{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T19:58:01Z","timestamp":1766087881741,"version":"3.28.0"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,4]],"date-time":"2022-05-04T00:00:00Z","timestamp":1651622400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,4]],"date-time":"2022-05-04T00:00:00Z","timestamp":1651622400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,4]]},"DOI":"10.1109\/cscwd54268.2022.9776133","type":"proceedings-article","created":{"date-parts":[[2022,5,20]],"date-time":"2022-05-20T19:39:11Z","timestamp":1653075551000},"page":"299-304","source":"Crossref","is-referenced-by-count":1,"title":["Get A Sense of Accomplishment in Doing Exercises: A Reinforcement Learning Perspective"],"prefix":"10.1109","author":[{"given":"Songdeng","family":"Niu","sequence":"first","affiliation":[{"name":"University of Electronic Science and Technology of China,School of Computer Science and Engineering, Institute for Cyber Security,China"}]},{"given":"Sheng","family":"Cao","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China,School of Computer Science and Engineering, Institute for Cyber Security,China"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2930238.2930247"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.02.008"},{"key":"ref12","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","author":"ng","year":"1999","journal-title":"Proceedings of the 16th International Conference on Machine Learning"},{"key":"ref13","first-page":"433","article-title":"Dynamic potential-based reward shaping","author":"devlin","year":"2012","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref14","first-page":"2652","article-title":"Expressing arbitrary reward functions as potential-based advice","author":"harutyuyan","year":"2015","journal-title":"Proceedings of the 29th AAAI Conference on Artificial Intelligence"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s40593-019-00180-4"},{"article-title":"The vocabulary size test","year":"2012","author":"nation","key":"ref16"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"88","DOI":"10.1080\/00031305.1994.10476030","article-title":"The three sigma rule","volume":"48","author":"pukelsheim","year":"1994","journal-title":"The American Statistician"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2020.565702"},{"article-title":"How to assign partial credit on an exam of true-false exercises?","year":"2016","author":"tao","key":"ref19"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TLT.2017.2692761"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s40593-019-00187-x"},{"key":"ref6","article-title":"A decision structure for teaching machines","author":"smallwood","year":"1962","journal-title":"PhD thesis"},{"key":"ref5","first-page":"112","article-title":"Towards closing the loop: bridging machine-induced pedagogical policies to learning theories","author":"zhou","year":"2017","journal-title":"Proceedings of 10th International Conference on Data Mining"},{"key":"ref8","first-page":"472","article-title":"Using inverse planning for personalized feedback","author":"rafferty","year":"2016","journal-title":"Proceedings of the 9th international conference on educational data mining"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-69132-7_41"},{"key":"ref2","first-page":"223","article-title":"Making learning fun: a taxonomy of intrinsic motivations for learning","author":"malone","year":"1987","journal-title":"Aptitude Learning and Instruction Vol 3 Conative and Affective Process Analyses"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compedu.2020.104079"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s11257-010-9093-1"},{"key":"ref20","first-page":"5251","article-title":"Nonlinear distributional gradient Temporal-Difference learning","author":"qu","year":"2019","journal-title":"Proceedings of the 36th International Conference on Machine Learning California"},{"key":"ref22","first-page":"792","article-title":"Principled methods for advising reinforcement learning agents","author":"wiewiora","year":"2003","journal-title":"Proceedings of the 20th International Conference on Machine Learning"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1515\/9781400835386"}],"event":{"name":"2022 IEEE 25th International Conference on Computer Supported Cooperative Work in Design (CSCWD)","start":{"date-parts":[[2022,5,4]]},"location":"Hangzhou, China","end":{"date-parts":[[2022,5,6]]}},"container-title":["2022 IEEE 25th International Conference on Computer Supported Cooperative Work in Design (CSCWD)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9775971\/9776016\/09776133.pdf?arnumber=9776133","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,25]],"date-time":"2024-09-25T17:38:50Z","timestamp":1727285930000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9776133\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,4]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/cscwd54268.2022.9776133","relation":{},"subject":[],"published":{"date-parts":[[2022,5,4]]}}}