{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T17:00:26Z","timestamp":1767114026294,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","funder":[{"name":"Japan Science and Technology Agency","award":["JPMJBS2402"],"award-info":[{"award-number":["JPMJBS2402"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,12]]},"DOI":"10.1145\/3714394.3750589","type":"proceedings-article","created":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T21:13:49Z","timestamp":1767042829000},"page":"617-620","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["What Does AI See through Animals' Eyes? Zero-Shot Activity Recognition from Bio-loggers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8184-3883","authenticated-orcid":false,"suffix":"Mr.","given":"Haruki","family":"Yonekura","sequence":"first","affiliation":[{"name":"The University of Osaka, Suita, Osaka, Japan and RIKEN Center for Computational Science, Kobe, Hyogo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8653-3523","authenticated-orcid":false,"given":"Kei","family":"Tanigaki","sequence":"additional","affiliation":[{"name":"The University of Osaka, Suita, Osaka, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1997-6891","authenticated-orcid":false,"given":"Tsuneari","family":"Kuroiwa","sequence":"additional","affiliation":[{"name":"The University of Osaka, Suita, Osaka, Japan"}]}],"member":"320","published-online":{"date-parts":[[2025,12,29]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","unstructured":"Moloud Abdar et al. 2024. A Review of Deep Learning for Video Captioning. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024) 1-20. https:\/\/doi.org\/10.1109\/TPAMI.2024.3522295","DOI":"10.1109\/TPAMI.2024.3522295"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1186\/2050-3385-1-20"},{"key":"e_1_3_2_1_3_1","volume-title":"Advances in Neural Information Processing Systems","volume":"31","author":"Xuguang","year":"2018","unstructured":"Xuguang Duan et al., 2018. Weakly supervised dense event captioning in videos. Advances in Neural Information Processing Systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Gaspard Dussert et al. 2024. Zero-shot animal behaviour classification with vision-language foundation models. Methods in Ecology and Evolution (2024).","DOI":"10.1111\/2041-210X.70059\/v2\/response1"},{"key":"e_1_3_2_1_5_1","unstructured":"Zalan Fabian et al. 2023. Multimodal Foundation Models for Zero-shot Animal Species Recognition in Camera Trap Images. CoRR (2023)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0254454"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1186\/s40462-024-00511-8"},{"key":"e_1_3_2_1_8_1","first-page":"1","article-title":"Exploring video captioning techniques: A comprehensive survey on deep learning methods","volume":"2","author":"Saiful Islam","year":"2021","unstructured":"Saiful Islam et al., 2021. Exploring video captioning techniques: A comprehensive survey on deep learning methods. SN Computer Science, Vol. 2, 2 (2021), 1-28.","journal-title":"SN Computer Science"},{"volume-title":"Proceedings of the IEEE international conference on computer vision. 706-715","author":"Ranjay","key":"e_1_3_2_1_9_1","unstructured":"Ranjay Krishna et al., 2017. Dense-captioning events in videos. In Proceedings of the IEEE international conference on computer vision. 706-715."},{"key":"e_1_3_2_1_10_1","first-page":"413","article-title":"Isolation forest. In 2008 eighth ieee international conference on data mining","author":"Liu Fei Tony","year":"2008","unstructured":"Fei Tony Liu et al., 2008. Isolation forest. In 2008 eighth ieee international conference on data mining. IEEE, 413-422.","journal-title":"IEEE"},{"volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Alec","key":"e_1_3_2_1_11_1","unstructured":"Alec Radford et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 14313-14323","author":"Shuhuai","key":"e_1_3_2_1_12_1","unstructured":"Shuhuai Ren et al., 2024. Timechat: A time-sensitive multimodal large language model for long video understanding. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 14313-14323."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.1146788"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Kei Tanigaki et al. 2024. Automatic recording of rare behaviors of wild animals using video bio-loggers with on-board light-weight outlier detector. PNAS nexus Vol. 3 1 (2024) pgad447.","DOI":"10.1093\/pnasnexus\/pgad447"},{"key":"e_1_3_2_1_15_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0128789"},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 10714-10726","author":"Antoine","key":"e_1_3_2_1_17_1","unstructured":"Antoine Yang et al., 2023. Vid2seq: Large-scale pretraining of a visual language model for dense video captioning. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 10714-10726."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Hang Yuan et al. 2024. Self-supervised learning for human activity recognition using 700 000 person-days of wearable data. NPJ digital medicine Vol. 7 1 (2024) 91.","DOI":"10.1038\/s41746-024-01062-3"},{"volume-title":"Proceedings of the 29th International Conference on Computational Linguistics. 5651-5665","author":"Wanrong","key":"e_1_3_2_1_19_1","unstructured":"Wanrong Zhu et al., 2022. End-to-end Dense Video Captioning as Sequence Generation. In Proceedings of the 29th International Conference on Computational Linguistics. 5651-5665."}],"event":{"name":"UbiComp '25:The 2025 ACM International Joint Conference on Pervasive and Ubiquitous Computing \/ ISWC ACM International Symposium on Wearable Computers","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGSPATIAL ACM Special Interest Group on Spatial Information"],"location":"Espoo Finland"},"container-title":["Companion of the 2025 ACM International Joint Conference on Pervasive and Ubiquitous Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3714394.3750589","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T16:57:00Z","timestamp":1767113820000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3714394.3750589"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":19,"alternative-id":["10.1145\/3714394.3750589","10.1145\/3714394"],"URL":"https:\/\/doi.org\/10.1145\/3714394.3750589","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"2025-12-29","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}