{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T11:43:57Z","timestamp":1743075837801,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":19,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819772438"},{"type":"electronic","value":"9789819772445"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-7244-5_21","type":"book-chapter","created":{"date-parts":[[2024,8,27]],"date-time":"2024-08-27T17:02:38Z","timestamp":1724778158000},"page":"318-329","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Reinforcement Learning from Clip"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-7955-9997","authenticated-orcid":false,"given":"Shaoqiang","family":"Zhu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kejia","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haiwei","family":"Pan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,28]]},"reference":[{"key":"21_CR1","unstructured":"Bojarski, M., et\u00a0al.: End to end learning for self-driving cars. arXiv preprint arXiv:1604.07316 (2016)"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Buffet, O., Pietquin, O., Weng, P.: Reinforcement learning (2020)","DOI":"10.1007\/978-3-030-06164-7_12"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Degris, T., Pilarski, P.M., Sutton, R.S.: Model-free reinforcement learning with continuous action in practice. In: 2012 American Control Conference (ACC), pp. 2177\u20132182. IEEE (2012)","DOI":"10.1109\/ACC.2012.6315022"},{"key":"21_CR4","unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G., Levine, S.: D4rl: datasets for deep data-driven reinforcement learning (2021)"},{"key":"21_CR5","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor (2018)"},{"key":"21_CR6","unstructured":"Hasselt, H.: Double q-learning. Adv. Neural Inf. Process. Syst. 23 (2010)"},{"key":"21_CR7","unstructured":"Hill, A., et al.: Stable baselines (2018). https:\/\/github.com\/hill-a\/stable-baselines"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Li, G., He, B., Gomez, R., Nakamura, K.: Interactive reinforcement learning from demonstration and human evaluative feedback. In: 2018 27th IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN), pp. 1156\u20131162. IEEE (2018)","DOI":"10.1109\/ROMAN.2018.8525837"},{"key":"21_CR9","unstructured":"Mnih, V., et al.: Playing atari with deep reinforcement learning (2013)"},{"issue":"7540","key":"21_CR10","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"21_CR11","unstructured":"Ng, A.Y., Russell, S., et\u00a0al.: Algorithms for inverse reinforcement learning. In: ICML (2000)"},{"issue":"6","key":"21_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3272127.3275014","volume":"37","author":"XB Peng","year":"2018","unstructured":"Peng, X.B., Kanazawa, A., Malik, J., Abbeel, P., Levine, S.: SFV: reinforcement learning of physical skills from videos. ACM Trans. Graph. (TOG) 37(6), 1\u201314 (2018)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"21_CR13","unstructured":"Schulman, J., Levine, S., Moritz, P., Jordan, M.I., Abbeel, P.: Trust region policy optimization (2017)"},{"key":"21_CR14","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms (2017)"},{"issue":"7587","key":"21_CR15","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"DN Silver","year":"2016","unstructured":"Silver, D.N., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Torabi, F., Warnell, G., Stone, P.: Behavioral cloning from observation. arXiv preprint arXiv:1805.01954 (2018)","DOI":"10.24963\/ijcai.2018\/687"},{"key":"21_CR17","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a030 (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"21_CR18","unstructured":"Wang, Z., Schaul, T., Hessel, M., Hasselt, H., Lanctot, M., Freitas, N.: Dueling network architectures for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1995\u20132003. PMLR (2016)"},{"key":"21_CR19","unstructured":"Zhu, S.: RLFC (2024). https:\/\/github.com\/ZhuShaoQiang\/RLFC"}],"container-title":["Lecture Notes in Computer Science","Web and Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-7244-5_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,27]],"date-time":"2024-08-27T17:08:39Z","timestamp":1724778519000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-7244-5_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819772438","9789819772445"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-7244-5_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"28 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The work was supported by the National Natural Science Foundation of China under (Grant No. 62072135) and the International Exchange Program of Harbin Engineering University for Innovation-oriented Talents Cultivation.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"APWeb-WAIM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM) Joint International Conference on Web and Big Data","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jinhua","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"apwebwaim2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/apweb2024.zjnu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}