{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T14:49:33Z","timestamp":1766414973789,"version":"3.37.3"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2019,4,29]],"date-time":"2019-04-29T00:00:00Z","timestamp":1556496000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Fuzzy Syst."],"published-print":{"date-parts":[[2019,7]]},"DOI":"10.1007\/s40815-019-00633-x","type":"journal-article","created":{"date-parts":[[2019,4,29]],"date-time":"2019-04-29T19:26:34Z","timestamp":1556565994000},"page":"1443-1454","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Adaptive Fuzzy Watkins: A New Adaptive Approach for Eligibility Traces in Reinforcement Learning"],"prefix":"10.1007","volume":"21","author":[{"given":"Matin","family":"Shokri","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2227-4507","authenticated-orcid":false,"given":"Seyed Hossein","family":"Khasteh","sequence":"additional","affiliation":[]},{"given":"Amin","family":"Aminifar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,4,29]]},"reference":[{"key":"633_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"issue":"1","key":"633_CR2","first-page":"5057","volume":"17","author":"H Seijen Van","year":"2016","unstructured":"Van Seijen, H., Mahmood, A.R., Pilarski, P.M., Machado, M.C., Sutton, R.S.: True online temporal-difference learning. J. Mach. Learn. Res. 17(1), 5057\u20135096 (2016)","journal-title":"J. Mach. Learn. Res."},{"issue":"2\u20133","key":"633_CR3","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1023\/A:1017936530646","volume":"49","author":"JA Boyan","year":"2002","unstructured":"Boyan, J.A.: Technical update: least-squares temporal difference learning. Mach. Learn. 49(2\u20133), 233\u2013246 (2002)","journal-title":"Mach. Learn."},{"issue":"2","key":"633_CR4","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1007\/s10626-006-8134-8","volume":"16","author":"D Choi","year":"2006","unstructured":"Choi, D., Van Roy, B.: A generalized kalman filter for fixed point approximation and efficient temporal-difference learning. Discrete Event Dyn. Syst. 16(2), 207\u2013239 (2006)","journal-title":"Discrete Event Dyn. Syst."},{"issue":"7","key":"633_CR5","doi-asserted-by":"publisher","first-page":"1515","DOI":"10.1109\/TAC.2009.2022097","volume":"54","author":"H Yu","year":"2009","unstructured":"Yu, H., Bertsekas, D.P.: Convergence results for some temporal difference methods based on least squares. IEEE Trans. Autom. Control 54(7), 1515\u20131531 (2009)","journal-title":"IEEE Trans. Autom. Control"},{"key":"633_CR6","unstructured":"Maei, H.R., Szepesv\u00e1ri, C., Bhatnagar, S., Sutton, R.S.: Toward off-policy learning control with function approximation. In: ICML, pp. 719\u2013726 (2010)"},{"key":"633_CR7","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Maei, H.R., Precup, D., Bhatnagar, S., Silver, D., Szepesv\u00e1ri, C., Wiewiora, E.: Fast gradient-descent methods for temporal-difference learning with linear function approximation, In: Proceedings of the 26th Annual International Conference on Machine Learning, 993\u20131000. ACM (2009)","DOI":"10.1145\/1553374.1553501"},{"key":"633_CR8","doi-asserted-by":"crossref","unstructured":"Maei, H.R., Sutton, R.S.: Gq (\n                    \n                      \n                    \n                    $$\\lambda$$\n                    \n                      \n                        \u03bb\n                      \n                    \n                  ): a general gradient algorithm for temporal-difference prediction learning with eligibility traces. In: Proceedings of the Third Conference on Artificial General Intelligence, vol. 1, pp. 91\u201396 (2010)","DOI":"10.2991\/agi.2010.22"},{"issue":"1","key":"633_CR9","first-page":"289","volume":"15","author":"M Geist","year":"2014","unstructured":"Geist, M., Scherrer, B.: Off-policy learning with eligibility traces: a survey. J. Mach. Learn. Res. 15(1), 289\u2013333 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"633_CR10","unstructured":"Gehring, C., Pan, Y., White, M.: Incremental truncated lstd, arXiv preprint \n                    arXiv:1511.08495\n                    \n                   (2015)"},{"key":"633_CR11","unstructured":"Pan, Y., White, A.M., White, M.: Accelerated gradient temporal difference learning. In: AAAI, 2464\u20132470 (2017)"},{"key":"633_CR12","unstructured":"Devraj, A.M., Meyn, S.P.: Fastest convergence for q-learning, arXiv preprint \n                    arXiv:1707.03770\n                    \n                   (2017)"},{"key":"633_CR13","doi-asserted-by":"crossref","unstructured":"Chen, S.-L., Wei, Y.-M.: Least-squares sarsa (lambda) algorithms for reinforcement learning, In: Natural Computation, 2008. ICNC\u201908. Fourth International Conference on, vol. 2, pp. 632\u2013636, IEEE (2008)","DOI":"10.1109\/ICNC.2008.694"},{"key":"633_CR14","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: a survey. J. Artif. Intell. Res. 4, 237\u2013285 (1996)","journal-title":"J. Artif. Intell. Res."},{"key":"633_CR15","volume-title":"Algorithms and Representations for Reinforcement Learning","author":"Y Engel","year":"2005","unstructured":"Engel, Y.: Algorithms and Representations for Reinforcement Learning. Hebrew University of Jerusalem, Jerusalem (2005)"},{"key":"633_CR16","volume-title":"Survey Reinforcement Learning","author":"V Dolk","year":"2010","unstructured":"Dolk, V.: Survey Reinforcement Learning. Eindhoven University of Technology, Eindhoven (2010)"},{"key":"633_CR17","doi-asserted-by":"crossref","unstructured":"Glorennec, P.Y., Jouffe, L.: Fuzzy q-learning. In: Proceedings of 6th International Fuzzy Systems Conference, vol. 2, 659\u2013662 (1997)","DOI":"10.1109\/FUZZY.1997.622790"},{"issue":"3","key":"633_CR18","doi-asserted-by":"publisher","first-page":"1478","DOI":"10.1109\/TSMCB.2004.825938","volume":"34","author":"MJ Er","year":"2004","unstructured":"Er, M.J., Deng, C.: Online tuning of fuzzy inference systems using dynamic fuzzy q-learning. IEEE Trans. Syst. Man Cybern. Part B (Cybernetics) 34(3), 1478\u20131489 (2004)","journal-title":"IEEE Trans. Syst. Man Cybern. Part B (Cybernetics)"},{"key":"633_CR19","doi-asserted-by":"crossref","unstructured":"Bu\u015foniu, L., Ernst, D., De\u00a0Schutter, B., Babu\u0161ka, R.: Continuous-state reinforcement learning with fuzzy approximation, In: Adaptive Agents and Multi-Agent Systems III. Adaptation and Multi-Agent Learning, pp.\u00a027\u201343, Springer, London (2008)","DOI":"10.1007\/978-3-540-77949-0_3"},{"issue":"10","key":"633_CR20","doi-asserted-by":"publisher","first-page":"1420","DOI":"10.1016\/j.fss.2008.11.026","volume":"160","author":"A Bonarini","year":"2009","unstructured":"Bonarini, A., Lazaric, A., Montrone, F., Restelli, M.: Reinforcement distribution in fuzzy q-learning. Fuzzy Sets Syst. 160(10), 1420\u20131443 (2009)","journal-title":"Fuzzy Sets Syst."},{"key":"633_CR21","doi-asserted-by":"crossref","unstructured":"Zajdel, R.: Fuzzy q(\n                    \n                      \n                    \n                    $$\\lambda$$\n                    \n                      \n                        \u03bb\n                      \n                    \n                  )-learning algorithm. In: Rutkowski, L., Scherer, R., Tadeusiewicz, R., Zadeh, L.A., Zurada, J.M. (eds) Artificial Intelligence and Soft Computing. (Berlin, Heidelberg), pp.\u00a0256\u2013263, Springer, Berlin (2010)","DOI":"10.1007\/978-3-642-13208-7_33"},{"key":"633_CR22","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D thesis, King\u2019s College, Cambridge (1989)"},{"key":"633_CR23","doi-asserted-by":"crossref","unstructured":"Peng, J., Williams, R.J.: Incremental multi-step q-learning, In: Machine Learning Proceedings 1994, 226\u2013232. Elsevier, Amsterdam (1994)","DOI":"10.1016\/B978-1-55860-335-6.50035-0"},{"key":"633_CR24","volume-title":"Reinforcement Learning","author":"R Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning. MIT Press, Cambridge (1998)"},{"issue":"1,2","key":"633_CR25","doi-asserted-by":"crossref","first-page":"73","DOI":"10.3233\/IFS-2009-0416","volume":"20","author":"J Leng","year":"2009","unstructured":"Leng, J., Fyfe, C., Jain, L.C.: Experimental analysis on sarsa (\n                    \n                      \n                    \n                    $$\\lambda$$\n                    \n                      \n                        \u03bb\n                      \n                    \n                  ) and q (\n                    \n                      \n                    \n                    $$\\lambda$$\n                    \n                      \n                        \u03bb\n                      \n                    \n                  ) with different eligibility traces strategies. J. Intell. Fuzzy Syst. 20(1,2), 73\u201382 (2009)","journal-title":"J. Intell. Fuzzy Syst."},{"key":"633_CR26","first-page":"1","volume":"5","author":"E Even-Dar","year":"2003","unstructured":"Even-Dar, E., Mansour, Y.: Learning rates for q-learning. J. Mach. Learn. Res. 5, 1\u201325 (2003). no.\u00a0Dec","journal-title":"J. Mach. Learn. Res."},{"issue":"01","key":"633_CR27","doi-asserted-by":"publisher","first-page":"578","DOI":"10.20965\/jaciii.2006.p0578","volume":"10","author":"H Tizhoosh","year":"2006","unstructured":"Tizhoosh, H.: Opposition-based reinforcement learning. JACIII 10(01), 578\u2013585 (2006)","journal-title":"JACIII"},{"key":"633_CR28","unstructured":"Azar, M.G., Munos, R., Ghavamzadeh, M., Kappen, H.: Speedy q-learning, In: Advances in Neural Information Processing Systems (2011)"},{"key":"633_CR29","unstructured":"Devraj, A.M., Meyn, S.: Zap q-learning, In: Advances in Neural Information Processing Systems, 2235\u20132244 (2017)"},{"key":"633_CR30","volume-title":"A Couse in Fuzzy Systems and Control","author":"L Wang","year":"1997","unstructured":"Wang, L.: A Couse in Fuzzy Systems and Control. Prentice-Hall, London (1997)"},{"issue":"3","key":"633_CR31","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1109\/TITS.2005.853698","volume":"6","author":"X Dai","year":"2005","unstructured":"Dai, X., Li, C.-K., Rad, A.B.: An approach to tune fuzzy controllers based on reinforcement learning for autonomous vehicle control. IEEE Trans. Intell. Transp. Syst. 6(3), 285\u2013293 (2005)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"633_CR32","unstructured":"Schneider, T.D.: Information theory primer with an appendix on logarithms. In: National Cancer Institute, Citeseer (2007)"},{"key":"633_CR33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-20347-3","volume-title":"Fundamentals in Information Theory and Coding","author":"M Borda","year":"2011","unstructured":"Borda, M.: Fundamentals in Information Theory and Coding. Springer, Berlin (2011)"}],"container-title":["International Journal of Fuzzy Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s40815-019-00633-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s40815-019-00633-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s40815-019-00633-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,27]],"date-time":"2020-04-27T23:23:30Z","timestamp":1588029810000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s40815-019-00633-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4,29]]},"references-count":33,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2019,7]]}},"alternative-id":["633"],"URL":"https:\/\/doi.org\/10.1007\/s40815-019-00633-x","relation":{},"ISSN":["1562-2479","2199-3211"],"issn-type":[{"type":"print","value":"1562-2479"},{"type":"electronic","value":"2199-3211"}],"subject":[],"published":{"date-parts":[[2019,4,29]]},"assertion":[{"value":"10 July 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 December 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 March 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 April 2019","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with Ethical Standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}