{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T23:44:10Z","timestamp":1767138250144,"version":"build-2238731810"},"publisher-location":"Cham","reference-count":14,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030033347","type":"print"},{"value":"9783030033354","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-03335-4_41","type":"book-chapter","created":{"date-parts":[[2018,11,1]],"date-time":"2018-11-01T08:14:02Z","timestamp":1541060042000},"page":"469-478","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Applying Online Expert Supervision in Deep Actor-Critic Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Jin","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiansheng","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiqing","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weitao","family":"Wan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianpeng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,11,2]]},"reference":[{"key":"41_CR1","unstructured":"Hessel, M., et al.: Rainbow: combining improvements in deep reinforcement learning. arXiv preprint arXiv:1710.02298 (2017)"},{"issue":"7676","key":"41_CR2","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354 (2017)","journal-title":"Nature"},{"key":"41_CR3","unstructured":"Duan, Y., Chen, X., Houthooft, R., Schulman, J., Abbeel, P.: Benchmarking deep reinforcement learning for continuous control. In: International Conference on Machine Learning, pp. 1329\u20131338 (2016)"},{"key":"41_CR4","unstructured":"Hester, T., et al.: Deep Q-learning from Demonstrations. arXiv preprint arXiv:1704.03732 (2017)"},{"key":"41_CR5","unstructured":"Cruz, J., Gabriel, V., Du, Y., Taylor, M.E.: Pre-training neural networks with human demonstrations for deep reinforcement learning. arXiv preprint arXiv:1709.04083 (2017)"},{"key":"41_CR6","unstructured":"Zhang, X., Ma, H.: Pretraining deep actor-critic reinforcement learning algorithms with expert demonstrations. arXiv preprint arXiv:1801.10459 (2018)"},{"key":"41_CR7","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937 (2016)"},{"key":"41_CR8","unstructured":"Vinyals, O., et al.: StarCraft II: a new challenge for reinforcement learning. arXiv preprint arXiv:1708.04782 (2017)"},{"key":"41_CR9","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Collobert, R., Weston, J.: Curriculum learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, vol. 60, no. 1, pp. 41\u201348 (2009)","DOI":"10.1145\/1553374.1553380"},{"key":"41_CR10","unstructured":"Wu, Y., Tian, Y.: Training agent for first-person shooter game with actor-critic curriculum learning. In: 5th International Conference on Learning Representations (2016)"},{"issue":"5","key":"41_CR11","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.1109\/TNN.1998.712192","volume":"9","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: an introduction. IEEE Trans. Neural Networks 9(5), 1054 (1998)","journal-title":"IEEE Trans. Neural Networks"},{"key":"41_CR12","doi-asserted-by":"crossref","unstructured":"Henderson, P., Islam, R., Bachman, P., Pineau, J., Precup, D., Meger, D.: Deep reinforcement learning that matters. arXiv preprint arXiv:1709.06560 (2017)","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"41_CR13","volume-title":"Introduction to Reinforcement Learning","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Introduction to Reinforcement Learning, vol. 135. MIT Press, Cambridge (1998)"},{"issue":"7540","key":"41_CR14","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529 (2015)","journal-title":"Nature"}],"updated-by":[{"DOI":"10.1007\/978-3-030-03335-4_45","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2018,12,28]],"date-time":"2018-12-28T00:00:00Z","timestamp":1545955200000}}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-03335-4_41","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,31]],"date-time":"2023-10-31T21:09:11Z","timestamp":1698786551000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-03335-4_41"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030033347","9783030033354"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-03335-4_41","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"2 November 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"28 December 2018","order":2,"name":"change_date","label":"Change Date","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Correction","order":3,"name":"change_type","label":"Change Type","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The original version of the chapter \u201cApplying Online Expert Supervision in Deep Actor-Critic Reinforcement Learning\u201d starting on p. 469 has been revised.","order":4,"name":"change_details","label":"Change Details","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 November 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 November 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/prcv.qyhw.net.cn\/?lang=en&meeting_id=255","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}