{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T23:07:12Z","timestamp":1762038432683,"version":"build-2065373602"},"reference-count":50,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,10]]},"DOI":"10.1109\/icpr48806.2021.9411972","type":"proceedings-article","created":{"date-parts":[[2021,5,6]],"date-time":"2021-05-06T02:15:54Z","timestamp":1620267354000},"page":"8751-8758","source":"Crossref","is-referenced-by-count":7,"title":["Self-Supervised Joint Encoding of Motion and Appearance for First Person Action Recognition"],"prefix":"10.1109","author":[{"given":"Mirco","family":"Planamente","sequence":"first","affiliation":[{"name":"Italian Institute of Technology"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrea","family":"Bottino","sequence":"additional","affiliation":[{"name":"Politecnico di Torino,Dept. of Control and Computer Eng.,Torino,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Barbara","family":"Caputo","sequence":"additional","affiliation":[{"name":"Italian Institute of Technology"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45103-X_50"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.281"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01017"},{"key":"ref32","first-page":"122","article-title":"Multi-modal domain adaptation for fine-grained action recognition","author":"munro","year":"0","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"journal-title":"Self-supervised Spatiotemporal Representation Learning for Videos by Predicting Motion and Appearance Statistics","year":"2019","author":"wang","key":"ref31"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20873-8_7"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00807"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00151"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_24"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.638"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.320"},{"key":"ref29","article-title":"Optical-flow based self-supervised learning of obstacle appearance applied to MAV landing","volume":"abs 1509 1423","author":"ho","year":"2015","journal-title":"CoRR"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01019"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_44"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00718"},{"journal-title":"Self-supervised visual feature learning with deep neural networks A survey[J]","year":"2019","author":"jing","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00118"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"ref23","article-title":"Unsupervised representation learning by predicting image rotations","author":"gidaris","year":"2018","journal-title":"ICLRE"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00975"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.278"},{"journal-title":"Seeing and hearing egocentric actions How much can we learn?","year":"2019","author":"cartas","key":"ref50"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.276"},{"journal-title":"Epic-fusion Audio-visual temporal binding for egocentric action recognition","year":"2019","author":"kazakos","key":"ref11"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995444"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_9"},{"journal-title":"Knowing what where and when to look Efficient video action modeling with attention","year":"2020","author":"perez-rua","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2901707"},{"key":"ref15","article-title":"Baidu-uts submission to the epic-kitchens action recognition challenge 2019","volume":"abs 1906 9383","author":"wang","year":"2019","journal-title":"CoRR"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00037"},{"key":"ref17","first-page":"0","article-title":"Multi-task learning to improve egocentric action recognition","author":"kapidis","year":"0","journal-title":"Proceedings of the IEEE International Conference on Computer Vision Workshops"},{"key":"ref18","article-title":"Multi-modal domain adaptation for fine-grained action recognition","author":"munro","year":"2020","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00543"},{"key":"ref4","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref3","article-title":"Attention is all we need: Nailing down object-centric attention for egocentric activity recognition","author":"sudhakaran","year":"0","journal-title":"British Machine Vision Conference"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248010"},{"key":"ref5","article-title":"Online action detection","author":"geest","year":"2016","journal-title":"Proc ECCV"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.214"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"journal-title":"Fbk-hupba submission to the epic-kitchens 2019 action recognition challenge","year":"2019","author":"sudhakaran","key":"ref49"},{"key":"ref9","article-title":"In the eye of beholder: Joint learning of gaze and actions in first person video","author":"li","year":"2018","journal-title":"the European Conference on Computer Vision (ECCV)"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.487"},{"key":"ref45","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"wang","year":"2016","journal-title":"Computer Vision - ECCV 2016"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01231"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.209"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2868668"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00050"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00464"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2992889"}],"event":{"name":"2020 25th International Conference on Pattern Recognition (ICPR)","start":{"date-parts":[[2021,1,10]]},"location":"Milan, Italy","end":{"date-parts":[[2021,1,15]]}},"container-title":["2020 25th International Conference on Pattern Recognition (ICPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9411940\/9411911\/09411972.pdf?arnumber=9411972","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T00:09:23Z","timestamp":1659485363000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9411972\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,10]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1109\/icpr48806.2021.9411972","relation":{},"subject":[],"published":{"date-parts":[[2021,1,10]]}}}