{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T02:09:31Z","timestamp":1742954971757,"version":"3.40.3"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030820985"},{"type":"electronic","value":"9783030820992"}],"license":[{"start":{"date-parts":[[2021,7,28]],"date-time":"2021-07-28T00:00:00Z","timestamp":1627430400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,7,28]],"date-time":"2021-07-28T00:00:00Z","timestamp":1627430400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-82099-2_39","type":"book-chapter","created":{"date-parts":[[2021,12,18]],"date-time":"2021-12-18T23:16:35Z","timestamp":1639869395000},"page":"436-446","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Fuzzy Baselines to Stabilize Policy Gradient Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Gabriela","family":"Surita","sequence":"first","affiliation":[]},{"given":"Andre","family":"Lemos","sequence":"additional","affiliation":[]},{"given":"Fernando","family":"Gomide","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,7,28]]},"reference":[{"key":"39_CR1","doi-asserted-by":"publisher","unstructured":"Berenji, H.: Fuzzy q-learning: a new approach for fuzzy dynamic programming. In: Proceedings of 1994 IEEE 3rd International Fuzzy Systems Conference, vol. 1, pp. 486\u2013491 (1994). https:\/\/doi.org\/10.1109\/FUZZY.1994.343737","DOI":"10.1109\/FUZZY.1994.343737"},{"issue":"5","key":"39_CR2","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1016\/j.tics.2019.02.006","volume":"23","author":"M Botvinick","year":"2019","unstructured":"Botvinick, M., Ritter, S., Wang, J.X., Kurth-Nelson, Z., Blundell, C., Hassabis, D.: Reinforcement learning, fast and slow. Trends Cogn. Sci. 23(5), 408\u2013422 (2019)","journal-title":"Trends Cogn. Sci."},{"key":"39_CR3","unstructured":"Brockman, G., et al.: Openai gym. arXiv preprint arXiv:1606.01540 (2016)"},{"key":"39_CR4","doi-asserted-by":"crossref","unstructured":"Cooper, M.G., Vidal, J.J.: Genetic design of fuzzy controllers: the cart and jointed-pole problem. In: Proceedings of 1994 IEEE 3rd International Fuzzy Systems Conference, pp. 1332\u20131337. IEEE (1994)","DOI":"10.1109\/FUZZY.1994.343619"},{"key":"39_CR5","unstructured":"Duan, Y., et al.: One-shot imitation learning. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems, vol. 30, pp. 1087\u20131098. Curran Associates, Inc. (2017). http:\/\/papers.nips.cc\/paper\/6709-one-shot-imitation-learning.pdf"},{"key":"39_CR6","unstructured":"Geist, M., Piot, B., Pietquin, O.: Is the bellman residual a bad proxy? In: Advances in Neural Information Processing Systems, pp. 3205\u20133214 (2017)"},{"key":"39_CR7","doi-asserted-by":"publisher","unstructured":"Glorennec, P., Jouffe, L.: Fuzzy q-learning. In: Proceedings of 6th International Fuzzy Systems Conference, vol. 2, pp. 659\u2013662 (1997). https:\/\/doi.org\/10.1109\/FUZZY.1997.622790","DOI":"10.1109\/FUZZY.1997.622790"},{"key":"39_CR8","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. arXiv preprint arXiv:1801.01290 (2018)"},{"key":"39_CR9","doi-asserted-by":"crossref","unstructured":"Henderson, P., Islam, R., Bachman, P., Pineau, J., Precup, D., Meger, D.: Deep reinforcement learning that matters (2019)","DOI":"10.1609\/aaai.v32i1.11694"},{"issue":"2","key":"39_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3054912","volume":"50","author":"A Hussein","year":"2017","unstructured":"Hussein, A., Gaber, M.M., Elyan, E., Jayne, C.: Imitation learning: a survey of learning methods. ACM Comput. Surv. 50(2), 1\u201335 (2017). https:\/\/doi.org\/10.1145\/3054912","journal-title":"ACM Comput. Surv."},{"key":"39_CR11","doi-asserted-by":"crossref","unstructured":"Jouffe, L.: Actor-critic learning based on fuzzy inference system. In: 1996 IEEE International Conference on Systems, Man and Cybernetics. Information Intelligence and Systems (Cat. No. 96CH35929), vol. 1, pp. 339\u2013344. IEEE (1996)","DOI":"10.1109\/ICSMC.1996.569792"},{"issue":"3","key":"39_CR12","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1109\/5326.704563","volume":"28","author":"L Jouffe","year":"1998","unstructured":"Jouffe, L.: Fuzzy inference system learning by reinforcement methods. IEEE Trans. Syst. Man Cybern.-Part C: Appl. Rev. 28(3), 338\u2013355 (1998)","journal-title":"IEEE Trans. Syst. Man Cybern.-Part C: Appl. Rev."},{"key":"39_CR13","unstructured":"Konda, V.R., Tsitsiklis, J.N.: Actor-critic algorithms. In: Advances in Neural Information Processing Systems, pp. 1008\u20131014. Citeseer (2000)"},{"key":"39_CR14","unstructured":"Kool, W., Van Hoof, H., Welling, M.: Attention, learn to solve routing problems! arXiv preprint arXiv:1803.08475 (2018)"},{"key":"39_CR15","doi-asserted-by":"crossref","unstructured":"Li, G., Mueller, M., Casser, V., Smith, N., Michels, D.L., Ghanem, B.: Oil: observational imitation learning. arXiv preprint arXiv:1803.01129 (2018)","DOI":"10.15607\/RSS.2019.XV.005"},{"key":"39_CR16","unstructured":"Mnih, V., et al.: Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"issue":"7540","key":"39_CR17","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"39_CR18","unstructured":"Paszke, A., et al.: Pytorch: an imperative style, high-performance deep learning library. In: Wallach, H., Larochelle, H., Beygelzimer, A., dAlch\u00e9-Buc, A., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 32, pp. 8024\u20138035. Curran Associates, Inc. (2019). http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"39_CR19","doi-asserted-by":"publisher","DOI":"10.1002\/9780470168967","volume-title":"Fuzzy Systems Engineering: Toward Human-Centric Computing","author":"W Pedrycz","year":"2007","unstructured":"Pedrycz, W., Gomide, F.: Fuzzy Systems Engineering: Toward Human-Centric Computing. Wiley, Hoboken (2007)"},{"issue":"7587","key":"39_CR20","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"39_CR21","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms (2014)"},{"key":"39_CR22","volume-title":"Reinforcement Learning: An Introduction","author":"R Sutton","year":"2018","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"39_CR23","unstructured":"Sutton, R., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems, pp. 1057\u20131063 (2000)"},{"issue":"18","key":"39_CR24","doi-asserted-by":"publisher","first-page":"3764","DOI":"10.1016\/j.ins.2007.03.012","volume":"177","author":"XS Wang","year":"2007","unstructured":"Wang, X.S., Cheng, Y.H., Yi, J.Q.: A fuzzy actor-critic reinforcement learning network. Inf. Sci. 177(18), 3764\u20133781 (2007)","journal-title":"Inf. Sci."},{"key":"39_CR25","unstructured":"Wu, Y.H., Charoenphakdee, N., Bao, H., Tangkaratt, V., Sugiyama, M.: Imitation learning from imperfect demonstration. arXiv preprint arXiv:1901.09387 (2019)"},{"key":"39_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, P., et al.: Kogun: accelerating deep deinforcement learning via integrating human suboptimal knowledge. arXiv preprint arXiv:2002.07418 (2020)","DOI":"10.24963\/ijcai.2020\/317"}],"container-title":["Lecture Notes in Networks and Systems","Explainable AI and Other Applications of Fuzzy Techniques"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-82099-2_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,14]],"date-time":"2024-09-14T18:31:05Z","timestamp":1726338665000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-82099-2_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,28]]},"ISBN":["9783030820985","9783030820992"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-82099-2_39","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"type":"print","value":"2367-3370"},{"type":"electronic","value":"2367-3389"}],"subject":[],"published":{"date-parts":[[2021,7,28]]},"assertion":[{"value":"28 July 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NAFIPS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"North American Fuzzy Information Processing Society Annual Conference","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"West Lafayette, IN","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 June 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 June 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nafips2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/polytechnic.purdue.edu\/nafips2021","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}