{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:29:24Z","timestamp":1755793764721,"version":"3.41.0"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031923869","type":"print"},{"value":"9783031923876","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-92387-6_23","type":"book-chapter","created":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T18:43:08Z","timestamp":1748198588000},"page":"321-338","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["BehAVE: Behaviour Alignment of\u00a0Video Game Encodings"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-3487-1339","authenticated-orcid":false,"given":"Nemanja","family":"Ra\u0161ajski","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2749-2618","authenticated-orcid":false,"given":"Chintan","family":"Trivedi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0889-2766","authenticated-orcid":false,"given":"Konstantinos","family":"Makantasis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5554-1961","authenticated-orcid":false,"given":"Antonios","family":"Liapis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7793-1450","authenticated-orcid":false,"given":"Georgios N.","family":"Yannakakis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"23_CR1","doi-asserted-by":"crossref","unstructured":"Apostolidis, E., Balaouras, G., Mezaris, V., Patras, I.: Combining global and local attention with positional encoding for video summarization. In: 2021 IEEE International Symposium on Multimedia (ISM), pp. 226\u2013234. IEEE (2021)","DOI":"10.1109\/ISM52913.2021.00045"},{"key":"23_CR2","unstructured":"Arjovsky, M., Bottou, L., Gulrajani, I., Lopez-Paz, D.: Invariant risk minimization. arXiv preprint arXiv:1907.02893 (2019)"},{"key":"23_CR3","unstructured":"Baker, B., et al.: Video pretraining (VPT): learning to act by watching unlabeled online videos. In: Advances in Neural Information Processing Systems, vol. 35, pp. 24639\u201324654 (2022)"},{"key":"23_CR4","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: ICML, vol.\u00a02, p.\u00a04 (2021)"},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"23_CR6","unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., Lopez, A., Koltun, V.: Carla: an open urban driving simulator. In: Conference on Robot Learning, pp. 1\u201316. PMLR (2017)"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Gu, X., Liu, G., Zhang, X., Tang, L., Zhou, X., Qiu, W.: Infrared-visible synthetic data from game engine for image fusion improvement. IEEE Trans. Games (2023)","DOI":"10.1109\/TG.2023.3263001"},{"key":"23_CR8","unstructured":"Kim, Y.W., et al.: How transferable are video representations based on synthetic data? In: Advances in Neural Information Processing Systems, vol. 35, pp. 35710\u201335723 (2022)"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Ko, D., et al.: Video-text representation learning via differentiable weak temporal alignment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5016\u20135025 (2022)","DOI":"10.1109\/CVPR52688.2022.00496"},{"key":"23_CR10","doi-asserted-by":"crossref","unstructured":"Kr\u00e4henb\u00fchl, P.: Free supervision from video games. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2955\u20132964 (2018)","DOI":"10.1109\/CVPR.2018.00312"},{"key":"23_CR11","unstructured":"Leiprecht, S.: Using simulations and domain randomization for autonomous driving. Tech. Rep. Comput. Sci. (July 2020), 1\u20134 (2020)"},{"issue":"1","key":"23_CR12","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1145\/502269.502288","volume":"45","author":"M Lewis","year":"2002","unstructured":"Lewis, M., Jacobson, J.: Game engines. Commun. ACM 45(1), 27 (2002)","journal-title":"Commun. ACM"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Mishra, S., et al.: Task2sim: towards effective pre-training and transfer from synthetic data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9194\u20139204 (2022)","DOI":"10.1109\/CVPR52688.2022.00898"},{"key":"23_CR14","unstructured":"Mnih, V., et al.: Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Patil, R., Boit, S., Gudivada, V., Nandigam, J.: A survey of text representation and embedding techniques in NLP. IEEE Access (2023)","DOI":"10.1109\/ACCESS.2023.3266377"},{"key":"23_CR16","doi-asserted-by":"crossref","unstructured":"Pearce, T., Zhu, J.: Counter-strike deathmatch with large-scale behavioural cloning. In: 2022 IEEE Conference on Games (CoG), pp. 104\u2013111. IEEE (2022)","DOI":"10.1109\/CoG51982.2022.9893617"},{"key":"23_CR17","volume-title":"Introduction To Game Development (Game Development)","author":"S Rabin","year":"2005","unstructured":"Rabin, S.: Introduction To Game Development (Game Development). Inc, Charles River Media (2005)"},{"key":"23_CR18","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"issue":"8","key":"23_CR19","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"23_CR20","unstructured":"Reed, S., et\u00a0al.: A generalist agent. arXiv preprint arXiv:2205.06175 (2022)"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using siamese BERT-networks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics (2019). https:\/\/arxiv.org\/abs\/1908.10084","DOI":"10.18653\/v1\/D19-1410"},{"key":"23_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1007\/978-3-319-46475-6_7","volume-title":"Computer Vision \u2013 ECCV 2016","author":"SR Richter","year":"2016","unstructured":"Richter, S.R., Vineet, V., Roth, S., Koltun, V.: Playing for data: ground truth from computer games. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 102\u2013118. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_7"},{"issue":"8","key":"23_CR23","doi-asserted-by":"publisher","first-page":"428","DOI":"10.1038\/s42256-020-0208-z","volume":"2","author":"S Risi","year":"2020","unstructured":"Risi, S., Togelius, J.: Increasing generality in machine learning through procedural content generation. Nat. Mach. Intell. 2(8), 428\u2013436 (2020)","journal-title":"Nat. Mach. Intell."},{"key":"23_CR24","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/0377-0427(87)90125-7","volume":"20","author":"P Rousseeuw","year":"1987","unstructured":"Rousseeuw, P.: Silhouettes: a graphical aid to the interpretation and validation of cluster analysis. J. Comput. Appl. Math. 20, 53\u201365 (1987)","journal-title":"J. Comput. Appl. Math."},{"key":"23_CR25","unstructured":"Simmons-Edler, R., Badman, R., Longpre, S., Rajan, K.: AI-powered autonomous weapons risk geopolitical instability and threaten AI research. arXiv preprint arXiv:2405.01859 (2024)"},{"key":"23_CR26","unstructured":"Song, Y.C., et al.: Unsupervised alignment of actions in video with text descriptions. In: IJCAI, pp. 2025\u20132031 (2016)"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Taesiri, M.R., Macklon, F., Bezemer, C.P.: Clip meets gamephysics: towards bug identification in gameplay videos using zero-shot transfer learning. In: Proceedings of the 19th International Conference on Mining Software Repositories., pp. 270\u2013281 (2022)","DOI":"10.1145\/3524842.3528438"},{"key":"23_CR28","doi-asserted-by":"crossref","unstructured":"Tobin, J., Fong, R., Ray, A., Schneider, J., Zaremba, W., Abbeel, P.: Domain randomization for transferring deep neural networks from simulation to the real world. In: 2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 23\u201330. IEEE (2017)","DOI":"10.1109\/IROS.2017.8202133"},{"issue":"1","key":"23_CR29","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/JPROC.2024.3364137","volume":"112","author":"J Togelius","year":"2024","unstructured":"Togelius, J., Yannakakis, G.N.: Choose your weapon: survival strategies for depressed AI academics [point of view]. Proc. IEEE 112(1), 4\u201311 (2024)","journal-title":"Proc. IEEE"},{"key":"23_CR30","unstructured":"Tong, Z., Song, Y., Wang, J., Wang, L.: Videomae: masked autoencoders are data-efficient learners for self-supervised video pre-training. In: Advances in Neural Information Processing Systems, vol. 35, pp. 10078\u201310093 (2022)"},{"key":"23_CR31","doi-asserted-by":"crossref","unstructured":"Trivedi, C., Liapis, A., Yannakakis, G.N.: Contrastive learning of generalized game representations. In: 2021 IEEE Conference on Games (CoG), pp.\u00a01\u20138. IEEE (2021)","DOI":"10.1109\/CoG52621.2021.9619107"},{"key":"23_CR32","unstructured":"Trivedi, C., Makantasis, K., Liapis, A., Yannakakis, G.N.: Towards general game representations: decomposing games pixels into content and style. arXiv preprint arXiv:2307.11141 (2023)"},{"issue":"8","key":"23_CR33","first-page":"8052","volume":"35","author":"J Wang","year":"2022","unstructured":"Wang, J., et al.: Generalizing to unseen domains: a survey on domain generalization. IEEE Trans. Knowl. Data Eng. 35(8), 8052\u20138072 (2022)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"23_CR34","doi-asserted-by":"crossref","unstructured":"Wang, L., et al.: Videomae v2: scaling video masked autoencoders with dual masking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 14549\u201314560 (2023)","DOI":"10.1109\/CVPR52729.2023.01398"},{"key":"23_CR35","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1016\/j.neucom.2018.05.083","volume":"312","author":"M Wang","year":"2018","unstructured":"Wang, M., Deng, W.: Deep visual domain adaptation: a survey. Neurocomputing 312, 135\u2013153 (2018)","journal-title":"Neurocomputing"},{"key":"23_CR36","doi-asserted-by":"crossref","unstructured":"Wang, R., et al.: Masked video distillation: rethinking masked feature modeling for self-supervised video representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6312\u20136322 (2023)","DOI":"10.1109\/CVPR52729.2023.00611"},{"key":"23_CR37","unstructured":"Wenzel, F., et al.: Assaying out-of-distribution generalization in transfer learning. In: Advances in Neural Information Processing Systems, vol. 35, pp. 7181\u20137198 (2022)"},{"key":"23_CR38","doi-asserted-by":"crossref","unstructured":"Wu, C.Y., Krahenbuhl, P.: Towards long-form video understanding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1884\u20131894 (2021)","DOI":"10.1109\/CVPR46437.2021.00192"},{"issue":"3","key":"23_CR39","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1109\/TG.2018.2877047","volume":"11","author":"M Wydmuch","year":"2018","unstructured":"Wydmuch, M., Kempka, M., Ja\u015bkowski, W.: Vizdoom competitions: playing doom from pixels. IEEE Trans. Games 11(3), 248\u2013259 (2018)","journal-title":"IEEE Trans. Games"},{"key":"23_CR40","doi-asserted-by":"crossref","unstructured":"Xu, H., et al.: Videoclip: contrastive pre-training for zero-shot video-text understanding. arXiv preprint arXiv:2109.14084 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.544"},{"key":"23_CR41","doi-asserted-by":"crossref","unstructured":"Xu, H., Gao, Y., Yu, F., Darrell, T.: End-to-end learning of driving models from large-scale video datasets. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2174\u20132182 (2017)","DOI":"10.1109\/CVPR.2017.376"},{"key":"23_CR42","doi-asserted-by":"crossref","unstructured":"Yue, X., Zhang, Y., Zhao, S., Sangiovanni-Vincentelli, A., Keutzer, K., Gong, B.: Domain randomization and pyramid consistency: Simulation-to-real generalization without accessing target domain data. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2100\u20132110 (2019)","DOI":"10.1109\/ICCV.2019.00219"},{"key":"23_CR43","unstructured":"Zhu, Y., et al.: A comprehensive study of deep video action recognition. arXiv preprint arXiv:2012.06567 (2020)"},{"key":"23_CR44","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Hou, J., Wu, D.O.: Cross-modal orthogonal high-rank augmentation for RGB-event transformer-trackers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22045\u201322055 (2023)","DOI":"10.1109\/ICCV51070.2023.02015"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-92387-6_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T18:43:20Z","timestamp":1748198600000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-92387-6_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031923869","9783031923876"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-92387-6_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}