{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T10:46:40Z","timestamp":1766054800806,"version":"3.48.0"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11246844","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"880-886","source":"Crossref","is-referenced-by-count":0,"title":["Anomaly Detection in Human-Robot Interaction Using Multimodal Models Constructed from In-the-Wild Interactions"],"prefix":"10.1109","author":[{"given":"Shota","family":"Mochizuki","sequence":"first","affiliation":[{"name":"Nagoya University,Graduate School of Informatics,Japan"}]},{"given":"Sanae","family":"Yamashita","sequence":"additional","affiliation":[{"name":"Nagoya University,Graduate School of Informatics,Japan"}]},{"given":"Kenya","family":"Hoshimure","sequence":"additional","affiliation":[{"name":"CyberAgent, Inc.,Japan"}]},{"given":"Jun","family":"Baba","sequence":"additional","affiliation":[{"name":"CyberAgent, Inc.,Japan"}]},{"given":"Tomonori","family":"Kubota","sequence":"additional","affiliation":[{"name":"Nagoya University,Graduate School of Engineering,Japan"}]},{"given":"Kohei","family":"Ogawa","sequence":"additional","affiliation":[{"name":"Nagoya University,Graduate School of Engineering,Japan"}]},{"given":"Ryuichiro","family":"Higashinaka","sequence":"additional","affiliation":[{"name":"Nagoya University,Graduate School of Informatics,Japan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-13-9443-0_6"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3472307.3484183"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21236\/ada459168"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.sigdial-1.10"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCA.2011.2164243"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2021.1928549"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3623809.3623863"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-97-3752-9_3"},{"key":"ref9","first-page":"3146","article-title":"The dialogue breakdown detection challenge: Task description, datasets, and evaluation metrics","volume-title":"Proc. LREC","author":"Higashinaka"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-9323-9_39"},{"key":"ref11","first-page":"4171","article-title":"BERT: Pretraining of deep bidirectional transformers for language understanding","volume-title":"Proc. NAACL-HLT","author":"Devlin"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.sigdial-1.12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/GCCE56475.2022.10014052"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN60168.2024.10731184"},{"article-title":"YOLOv10: Real-time end-to-end object detection","year":"2024","author":"Wang","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3390\/sym11091066"},{"key":"ref17","first-page":"10078","article-title":"VideoMAE: Masked autoencoders are data-efficient learners for self-supervised video pre-training","volume-title":"Proc. NeurIPS","volume":"35","author":"Tong"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413869"},{"key":"ref20","first-page":"973","article-title":"The foundations of cost-sensitive learning","volume-title":"Proc. IJCAI","volume":"2","author":"Elkan"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.580"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00535"},{"issue":"11","key":"ref23","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1148\/radiol.2263011500"},{"article-title":"Applying conditional random fields to Japanese morphological analysis","volume-title":"Proc. EMNLP","author":"Kudo","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3623809.3623889"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11246844.pdf?arnumber=11246844","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T10:44:07Z","timestamp":1766054647000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11246844\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11246844","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}