{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T12:27:05Z","timestamp":1730204825192,"version":"3.28.0"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,6]],"date-time":"2022-12-06T00:00:00Z","timestamp":1670284800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,6]],"date-time":"2022-12-06T00:00:00Z","timestamp":1670284800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,6]]},"DOI":"10.1109\/cdc51059.2022.9992858","type":"proceedings-article","created":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T19:26:56Z","timestamp":1673378816000},"page":"3456-3461","source":"Crossref","is-referenced-by-count":2,"title":["A Teacher-Student Markov Decision Process-based Framework for Online Correctional Learning"],"prefix":"10.1109","author":[{"given":"Ines","family":"Lourenco","sequence":"first","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Decision and Control Systems,Stockholm,Sweden"}]},{"given":"Rebecka","family":"Winqvist","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Decision and Control Systems,Stockholm,Sweden"}]},{"given":"Cristian R.","family":"Rojas","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Decision and Control Systems,Stockholm,Sweden"}]},{"given":"Bo","family":"Wahlberg","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Decision and Control Systems,Stockholm,Sweden"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-019-48995-4"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"journal-title":"OED Online.","article-title":"learn, v","year":"2021","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2021.08.328"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-247-2.50037-1"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2013.32"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2007.05.016"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3166\/ejc.15.275-310"},{"key":"ref11","first-page":"599","article-title":"Active learning: A survey","volume-title":"Data Classification.","author":"Aggarwal"},{"article-title":"Counterfactual explanations for machine learning: A review","year":"2020","author":"Verma","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2003.07.005"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1002\/0471200611"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2019.03.002"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/1541880.1541882"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781316471104"},{"article-title":"A teacherstudent framework for online correctional learning","year":"2021","author":"Louren\u00e7o","key":"ref18"},{"volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming.","year":"2014","author":"Puterman","key":"ref19"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.3389\/fnsys.2021.644059"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2019.10.014"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1088\/0266-5611\/25\/12\/123014"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/tcds.2021.3120301"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICDL-EpiRob48136.2020.9278033"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1162\/neco.2008.11-07-654"},{"key":"ref26","article-title":"Differentiable convex optimization layers","volume":"32","author":"Agrawal","year":"2019","journal-title":"Advances in Neural Information Processing Systems (NEURIPS)"}],"event":{"name":"2022 IEEE 61st Conference on Decision and Control (CDC)","start":{"date-parts":[[2022,12,6]]},"location":"Cancun, Mexico","end":{"date-parts":[[2022,12,9]]}},"container-title":["2022 IEEE 61st Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9992315\/9992317\/09992858.pdf?arnumber=9992858","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T11:52:05Z","timestamp":1706788325000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9992858\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,6]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/cdc51059.2022.9992858","relation":{},"subject":[],"published":{"date-parts":[[2022,12,6]]}}}