{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:57:40Z","timestamp":1759334260908,"version":"build-2065373602"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,26]],"date-time":"2025-07-26T00:00:00Z","timestamp":1753488000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,26]],"date-time":"2025-07-26T00:00:00Z","timestamp":1753488000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,26]]},"DOI":"10.23919\/mva65244.2025.11175123","type":"proceedings-article","created":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T17:35:13Z","timestamp":1758908113000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["MoExDA: Domain Adaptation for Edge-based Action Recognition"],"prefix":"10.23919","author":[{"given":"Takuya","family":"Sugimoto","sequence":"first","affiliation":[{"name":"Nagoya Institute of Technology,Japan"}]},{"given":"Ning","family":"Ding","sequence":"additional","affiliation":[{"name":"Nagoya Institute of Technology,Japan"}]},{"given":"Toru","family":"Tamaki","sequence":"additional","affiliation":[{"name":"Nagoya Institute of Technology,Japan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01594-9"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01594-9"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3115476"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00685"},{"article-title":"Enabling detailed action recognition evaluation through video dataset augmentation","volume-title":"Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track","author":"Chung","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_32"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-2071-5_27"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-49409-8_2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298599"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.339"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_10"},{"article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"International Conference on Learning Representations","author":"Dosovitskiy","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01220"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR42600.2020.00028","article-title":"X3d: Expanding architectures for e\ufb03cient video recognition","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Feichtenhofer"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/iccv.2019.00630"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1587\/transfun.2020IMP0012"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"ref19","first-page":"813","article-title":"Is space-time attention all you need for video understanding?","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Bertasius"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3243465"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"ref22","first-page":"6787","article-title":"VideoCLIP: Contrastive pre-training for zero-shot video-text understanding","volume-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing","author":"Xu"},{"key":"ref23","article-title":"Actionclip: A new paradigm for video action recognition","volume-title":"CoRR","volume":"abs\/2109.08472","author":"Wang","year":"2021"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00633"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.5220\/0012310400003660"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01823"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01163"},{"key":"ref28","article-title":"Why can\u2019t I dance in the mall? learning to mitigate scene bias in action recognition","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Choi","year":"2019"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73113-6_25"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-25069-9_36"},{"key":"ref31","article-title":"UCF101: A dataset of 101 human actions classes from videos in the wild","volume-title":"CoRR","volume":"abs\/1212.0402","author":"Soomro","year":"2012"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/s00138-016-0746-x"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/11758501_76"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-89639-5_40"},{"key":"ref35","first-page":"448","article-title":"Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift","volume-title":"Proceedings of the 32nd International Conference on Machine Learning","author":"Ioffe"},{"article-title":"Instance normalization: The missing ingredient for fast stylization","year":"2017","author":"Ulyanov","key":"ref36"},{"key":"ref37","first-page":"3","article-title":"Group Normalization","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Wu"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.167"},{"key":"ref39","article-title":"Positional normalization","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Li","year":"2019"},{"article-title":"mixup: Beyond empirical risk minimization","volume-title":"6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings","author":"Zhang","key":"ref40"},{"volume-title":"Videomix: Rethinking data augmentation for video classification","year":"2020","author":"Yun","key":"ref41"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3551626.3564941"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref44","article-title":"The kinetics human action video dataset","volume-title":"CoRR","volume":"abs\/1705.06950","author":"Kay","year":"2017"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01446-y"}],"event":{"name":"2025 19th International Conference on Machine Vision and Applications (MVA)","start":{"date-parts":[[2025,7,26]]},"location":"Kyoto, Japan","end":{"date-parts":[[2025,7,28]]}},"container-title":["2025 19th International Conference on Machine Vision and Applications (MVA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11174131\/11175049\/11175123.pdf?arnumber=11175123","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T13:34:22Z","timestamp":1759239262000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11175123\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,26]]},"references-count":45,"URL":"https:\/\/doi.org\/10.23919\/mva65244.2025.11175123","relation":{},"subject":[],"published":{"date-parts":[[2025,7,26]]}}}