{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T05:06:14Z","timestamp":1648789574720},"reference-count":23,"publisher":"Informa UK Limited","issue":"3","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Journal of Experimental &amp; Theoretical Artificial Intelligence"],"published-print":{"date-parts":[[2001,7]]},"DOI":"10.1080\/09528130110063100","type":"journal-article","created":{"date-parts":[[2002,7,26]],"date-time":"2002-07-26T15:23:56Z","timestamp":1027697036000},"page":"241-270","source":"Crossref","is-referenced-by-count":1,"title":["Q-Learning: computation of optimal Q-values for evaluating the learning level in robotic tasks"],"prefix":"10.1080","volume":"13","author":[{"given":"Tiziana","family":"D'Orazio","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Grazia","family":"Cicirelli","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"301","reference":[{"key":"CIT0001","first-page":"279","volume":"23","author":"Asada M.","year":"1996","journal-title":"Machine Learning"},{"key":"CIT0002","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","volume":"13","author":"Barto A.","year":"1983","journal-title":"IEEE Transactions on System Man and Cybernetics"},{"key":"CIT0003","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1098-111X(199710)12:10<695::AID-INT1>3.0.CO;2-T"},{"key":"CIT0004","unstructured":"Cicirelli, G., D'Orazio, T., Capozzo, L. and Distante, A. tearhing elementary behaviors with khepera robot. In Proceedings of the first International Khepera workshop. Paderborn, Germany. pp.109\u2013118."},{"key":"CIT0005","doi-asserted-by":"crossref","unstructured":"Cicirelli, G., D'Orazio, T., Distante, C. and Attolico, G. Learning actions from vision-based positioning in goal-directed navigation. In Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems. Victoria, Canada. pp.1715\u20131720.","DOI":"10.1109\/IROS.1998.724845"},{"key":"CIT0006","doi-asserted-by":"publisher","DOI":"10.1109\/37.257890"},{"key":"CIT0007","doi-asserted-by":"publisher","DOI":"10.1016\/0921-8890(95)00009-5"},{"key":"CIT0008","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"Kaebling L.","year":"1996","journal-title":"Journal of Artifical Intelligence Research"},{"key":"CIT0009","unstructured":"Kearns, M. and Singh, S. Finite-sample convergence rates for q-learning and indirect algorithms. In Proceedings of Neural Information Processing Systems. Denver, Colorado. pp.996\u20131002."},{"key":"CIT0010","unstructured":"Littman, M. L. and Szepesvari, C. A generalized reinforcement learning model: Convergence and applications. In Proceedings of International Conference of Machine Learning. Bari, Italy. pp.310\u2013318."},{"key":"CIT0011","doi-asserted-by":"publisher","DOI":"10.1016\/0921-8890(95)00021-7"},{"key":"CIT0012","doi-asserted-by":"publisher","DOI":"10.1109\/3477.499792"},{"key":"CIT0013","doi-asserted-by":"crossref","unstructured":"Minato, T. and Asada, M. Environmental change adaptation for mobile robot navigation. In Proceedings of International Conference on Intelligent Robots and Systems. Victoria, Canada. pp.1859\u20131864.","DOI":"10.1109\/IROS.1998.724867"},{"key":"CIT0014","doi-asserted-by":"crossref","unstructured":"Mitchell, T. 1997.Machine LearningMcGraw Hill","DOI":"10.1111\/j.1600-0749.1997.tb00478.x"},{"key":"CIT0015","doi-asserted-by":"publisher","DOI":"10.1177\/105971239700500307"},{"key":"CIT0016","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007678930559"},{"key":"CIT0017","volume-title":"Reinforcement Learning","author":"Sutton R. S.","year":"1998"},{"key":"CIT0018","unstructured":"Szepesvari, C. The asymptotic convergence-rate of q-learning. In Proceedings of Neural Information Processing Systems. Denver, Colorado. pp.1064\u20131070."},{"key":"CIT0019","first-page":"185","volume":"16","author":"Tsitsiklis J.","year":"1994","journal-title":"Machine Learning"},{"key":"CIT0020","doi-asserted-by":"crossref","unstructured":"Uchibe, E., Asada, M. and Hosoda, K. Behavior coordination for a mobile robot using modular reinforcement learning. In Proceedings of International Conference on Intelligent Robots and Systems. pp.1329\u20131336.","DOI":"10.1109\/IROS.1996.568989"},{"key":"CIT0021","volume-title":"Learning from Delayed Rewards","author":"Watkins C. J.","year":"1989"},{"key":"CIT0022","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022676722315"},{"key":"CIT0023","doi-asserted-by":"crossref","unstructured":"Yamaguchi, T., Masubuchi, M., Fujihara, K. and Yachida, M. Realtime reinforcement learning for a real robot in the real environment. In Proceedings of the 1996 IROS Conference. Osalca, Japan. pp.1321\u20131327.","DOI":"10.1109\/IROS.1996.568988"}],"container-title":["Journal of Experimental &amp; Theoretical Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/09528130110063100","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,3,3]],"date-time":"2020-03-03T21:19:28Z","timestamp":1583270368000},"score":1,"resource":{"primary":{"URL":"http:\/\/www.tandfonline.com\/doi\/abs\/10.1080\/09528130110063100"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2001,7]]},"references-count":23,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2001,7]]}},"alternative-id":["10.1080\/09528130110063100"],"URL":"https:\/\/doi.org\/10.1080\/09528130110063100","relation":{},"ISSN":["0952-813X","1362-3079"],"issn-type":[{"value":"0952-813X","type":"print"},{"value":"1362-3079","type":"electronic"}],"subject":[],"published":{"date-parts":[[2001,7]]}}}