{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T04:21:25Z","timestamp":1744172485981,"version":"3.40.3"},"publisher-location":"Cham","reference-count":60,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031732089"},{"type":"electronic","value":"9783031732096"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73209-6_3","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T15:02:57Z","timestamp":1730386977000},"page":"36-54","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Are Synthetic Data Useful for\u00a0Egocentric Hand-Object Interaction Detection?"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8693-3826","authenticated-orcid":false,"given":"Rosario","family":"Leonardi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6911-0302","authenticated-orcid":false,"given":"Antonino","family":"Furnari","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6368-1910","authenticated-orcid":false,"given":"Francesco","family":"Ragusa","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6034-0432","authenticated-orcid":false,"given":"Giovanni Maria","family":"Farinella","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"3_CR1","doi-asserted-by":"crossref","unstructured":"Besari, A.R.A., Saputra, A.A., Chin, W.H., Kubota, N., et\u00a0al.: Hand\u2013object interaction recognition based on visual attention using multiscopic cyber-physical-social system. Int. J. Adv. Intell. Inform. 9(2) (2023)","DOI":"10.26555\/ijain.v9i2.901"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Bousmalis, K., Silberman, N., Dohan, D., Erhan, D., Krishnan, D.: Unsupervised pixel-level domain adaptation with generative adversarial networks. In: CVPR, pp. 3722\u20133731 (2017)","DOI":"10.1109\/CVPR.2017.18"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Cai, Q., Pan, Y., Ngo, C.W., Tian, X., Duan, L., Yao, T.: Exploring object relation in mean teacher for cross-domain detection. In: CVPR, pp. 11457\u201311466 (2019)","DOI":"10.1109\/CVPR.2019.01172"},{"key":"3_CR4","doi-asserted-by":"publisher","first-page":"714023","DOI":"10.3389\/frobt.2021.714023","volume":"8","author":"A Carf\u00ec","year":"2021","unstructured":"Carf\u00ec, A., et al.: Hand-object interaction: from human demonstrations to robot manipulation. Front. Robot. AI 8, 714023 (2021)","journal-title":"Front. Robot. AI"},{"key":"3_CR5","unstructured":"Cheng, T., Shan, D., Hassen, A.S., Higgins, R.E.L., Fouhey, D.: Towards a richer 2D understanding of hands at scale. In: Thirty-Seventh Conference on Neural Information Processing Systems (2023)"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Choudhary, A., Mishra, D., Karmakar, A.: Domain adaptive egocentric person re-identification. In: Computer Vision and Image Processing (CVIP), pp. 81\u201392 (2021)","DOI":"10.1007\/978-981-16-1103-2_8"},{"key":"3_CR7","unstructured":"Csurka, G.: Domain adaptation for visual applications: a comprehensive survey (2017). https:\/\/arxiv.org\/abs\/1702.05374"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Damen, D., et al.: Rescaling egocentric vision: collection, pipeline and challenges for epic-kitchens-100. IJCV, 1\u201323 (2021)","DOI":"10.1007\/s11263-021-01531-2"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Damen, D., et\u00a0al.: Scaling egocentric vision: the epic-kitchens dataset. In: ECCV, pp. 720\u2013736 (2018)","DOI":"10.1007\/978-3-030-01225-0_44"},{"key":"3_CR10","unstructured":"Darkhalil, A., et al.: Epic-kitchens visor benchmark: video segmentations and object relations. In: NeurIPS, pp. 13745\u201313758 (2022)"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Li, W., Chen, Y., Duan, L.: Unbiased mean teacher for cross-domain object detection. In: CVPR, pp. 4091\u20134101 (2021)","DOI":"10.1109\/CVPR46437.2021.00408"},{"key":"3_CR12","doi-asserted-by":"publisher","first-page":"23241","DOI":"10.1007\/s11042-020-09597-9","volume":"80","author":"M Di Benedetto","year":"2021","unstructured":"Di Benedetto, M., Carrara, F., Meloni, E., Amato, G., Falchi, F., Gennaro, C.: Learning accurate personal protective equipment detection from virtual worlds. Multimedia Tools Appl. 80, 23241\u201323253 (2021)","journal-title":"Multimedia Tools Appl."},{"key":"3_CR13","unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., Lopez, A., Koltun, V.: CARLA: an open urban driving simulator. In: Proceedings of the 1st Annual Conference on Robot Learning, pp. 1\u201316 (2017)"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Edsinger, A., Kemp, C.C.: Human-robot interaction for cooperative manipulation: handing objects to one another. In: RO-MAN 2007-The 16th IEEE International Symposium on Robot and Human Interactive Communication, pp. 1167\u20131172. IEEE (2007)","DOI":"10.1109\/ROMAN.2007.4415256"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Fabbri, M., et al.: Motsynth: how can synthetic data help pedestrian detection and tracking? In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01067"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Fu, Q., Liu, X., Kitani, K.M.: Sequential voting with relational box fields for active object detection. In: CVPR, pp. 2374\u20132383 (2022)","DOI":"10.1109\/CVPR52688.2022.00241"},{"key":"3_CR17","unstructured":"Ganin, Y., Lempitsky, V.: Unsupervised domain adaptation by backpropagation. In: International Conference on Machine Learning, pp. 1180\u20131189. PMLR (2015)"},{"key":"3_CR18","unstructured":"Grauman, K., et al.: Ego4d: around the world in 3,000 hours of egocentric video. In: CVPR, pp. 18995\u201319012 (2021)"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Hasson, Y., et al.: Learning joint reconstruction of hands and manipulated objects. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01208"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Jian, J., Liu, X., Li, M., Hu, R., Liu, J.: Affordpose: a large-scale dataset of hand-object interactions with affordance-driven hand pose. In: ICCV, pp. 14713\u201314724 (2023)","DOI":"10.1109\/ICCV51070.2023.01352"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Wu, Y., He, K., Girshick, R.: Pointrend: image segmentation as rendering. In: CVPR, pp. 9799\u20139808 (2020)","DOI":"10.1109\/CVPR42600.2020.00982"},{"key":"3_CR22","unstructured":"Kolve, E., et al.: Ai2-thor: an interactive 3d environment for visual AI (2017). https:\/\/arxiv.org\/abs\/1712.05474"},{"key":"3_CR23","unstructured":"Kolve, E., et al.: AI2-THOR: an interactive 3D environment for visual AI. arXiv (2017)"},{"key":"3_CR24","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1007\/978-3-031-06430-2_20","volume-title":"ICIAP 2022","author":"R Leonardi","year":"2022","unstructured":"Leonardi, R., Ragusa, F., Furnari, A., Farinella, G.M.: Egocentric human-object interaction detection exploiting synthetic data. In: Sclaroff, S., Distante, C., Leo, M., Farinella, G.M., Tombari, F. (eds.) ICIAP 2022. LNCS, vol. 13232, pp. 237\u2013248. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-06430-2_20"},{"key":"3_CR25","unstructured":"Li, C., et al.: igibson 2.0: object-centric simulation for robot learning of everyday household tasks. In: Faust, A., Hsu, D., Neumann, G. (eds.) Proceedings of the 5th Conference on Robot Learning. Proceedings of Machine Learning Research, vol.\u00a0164, pp. 455\u2013465. PMLR (2022). https:\/\/proceedings.mlr.press\/v164\/li22b.html"},{"key":"3_CR26","doi-asserted-by":"crossref","unstructured":"Li, Y., Nagarajan, T., Xiong, B., Grauman, K.: Ego-exo: transferring visual representations from third-person to first-person videos. In: CVPR, pp. 6943\u20136953 (2021)","DOI":"10.1109\/CVPR46437.2021.00687"},{"key":"3_CR27","unstructured":"Li, Y.J., et al.: Cross-domain adaptive teacher for object detection. In: CVPR, pp. 7581\u20137590 (2022)"},{"key":"3_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"3_CR29","doi-asserted-by":"crossref","unstructured":"Liu, S., Tripathi, S., Majumdar, S., Wang, X.: Joint hand motion and interaction hotspots prediction from egocentric videos. In: CVPR, pp. 3282\u20133292 (2022)","DOI":"10.1109\/CVPR52688.2022.00328"},{"key":"3_CR30","unstructured":"Liu, Y.C., et al.: Unbiased teacher for semi-supervised object detection. In: ICLR (2021)"},{"key":"3_CR31","unstructured":"Lu, Y., Mayol-Cuevas, W.W.: Egocentric hand-object interaction detection and application (2021). https:\/\/arxiv.org\/abs\/2109.14734"},{"issue":"22","key":"3_CR32","doi-asserted-by":"publisher","first-page":"11457","DOI":"10.3390\/app122211457","volume":"12","author":"Z Lv","year":"2022","unstructured":"Lv, Z., Poiesi, F., Dong, Q., Lloret, J., Song, H.: Deep learning for intelligent human-computer interaction. Appl. Sci. 12(22), 11457 (2022)","journal-title":"Appl. Sci."},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Savva, M., et al.: Habitat: A platform for embodied AI research. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00943"},{"key":"3_CR34","doi-asserted-by":"crossref","unstructured":"Munro, J., Damen, D.: Multi-modal domain adaptation for fine-grained action recognition. In: CVPR, pp. 122\u2013132 (2020)","DOI":"10.1109\/CVPR42600.2020.00020"},{"key":"3_CR35","unstructured":"Munro, J., Wray, M., Larlus, D., Csurka, G., Damen, D.: Domain adaptation in multi-view embedding for cross-modal video retrieval. ArXiv abs\/2110.12812 (2021). https:\/\/api.semanticscholar.org\/CorpusID:239768993"},{"key":"3_CR36","unstructured":"NVIDIA: Nvidia omniverse (2020). https:\/\/www.nvidia.com\/en-us\/omniverse\/synthetic-data\/"},{"key":"3_CR37","unstructured":"NVIDIA: Nvidia isaac sim (2021). https:\/\/developer.nvidia.com\/isaac-sim"},{"key":"3_CR38","unstructured":"Orlando, S., Furnari, A., Farinella, G.M.: Egocentric visitor localization and artwork detection in cultural sites using synthetic data. Pattern Recognition Letters - Special Issue on Pattern Recognition and Artificial Intelligence Techniques for Cultural Heritage (2020). https:\/\/iplab.dmi.unict.it\/SimulatedEgocentricNavigations\/"},{"key":"3_CR39","doi-asserted-by":"publisher","first-page":"104098","DOI":"10.1016\/j.imavis.2021.104098","volume":"107","author":"G Pasqualino","year":"2021","unstructured":"Pasqualino, G., Furnari, A., Signorello, G., Farinella, G.M.: An unsupervised domain adaptation scheme for single-stage artwork recognition in cultural sites. Image Vis. Comput. 107, 104098 (2021)","journal-title":"Image Vis. Comput."},{"key":"3_CR40","doi-asserted-by":"crossref","unstructured":"Plizzari, C., Perrett, T., Caputo, B., Damen, D.: What can a cook in Italy teach a mechanic in India? action recognition generalisation over scenarios and locations. In: ICCV2023 (2023)","DOI":"10.1109\/ICCV51070.2023.01256"},{"key":"3_CR41","doi-asserted-by":"crossref","unstructured":"Quattrocchi, C., Mauro, D.D., Furnari, A., Lopes, A., Moltisanti, M., Farinella, G.M.: Put your PPE on: a tool for synthetic data generation and related benchmark in construction site scenarios. In: International Conference on Computer Vision Theory and Applications, pp. 656\u2013663 (2023)","DOI":"10.5220\/0011718000003417"},{"key":"3_CR42","doi-asserted-by":"crossref","unstructured":"Ragusa, F., Furnari, A., Livatino, S., Farinella, G.M.: The meccano dataset: understanding human-object interactions from egocentric videos in an industrial-like domain. In: Winter Conference on Applications of Computer Vision, pp. 1569\u20131578 (2021)","DOI":"10.1109\/WACV48630.2021.00161"},{"key":"3_CR43","doi-asserted-by":"crossref","unstructured":"Ragusa, F., et al.: Enigma-51: towards a fine-grained understanding of human behavior in industrial scenarios. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 4549\u20134559 (2024)","DOI":"10.1109\/WACV57701.2024.00449"},{"key":"3_CR44","unstructured":"Ramakrishnan, S.K., et\u00a0al.: Habitat-matterport 3d dataset (hm3d): 1000 large-scale 3d environments for embodied AI. In: NeurIPS (2021)"},{"key":"3_CR45","doi-asserted-by":"crossref","unstructured":"Saito, K., Watanabe, K., Ushiku, Y., Harada, T.: Maximum classifier discrepancy for unsupervised domain adaptation. In: CVPR, pp. 3723\u20133732 (2018)","DOI":"10.1109\/CVPR.2018.00392"},{"key":"3_CR46","doi-asserted-by":"crossref","unstructured":"Savva, M., et\u00a0al.: Habitat: a platform for embodied AI research. In: ICCV, pp. 9339\u20139347 (2019)","DOI":"10.1109\/ICCV.2019.00943"},{"key":"3_CR47","doi-asserted-by":"crossref","unstructured":"Sener, F., et al.: Assembly101: a large-scale multi-view video dataset for understanding procedural activities. In: CVPR, pp. 21096\u201321106 (2022)","DOI":"10.1109\/CVPR52688.2022.02042"},{"key":"3_CR48","unstructured":"shadowrobot: Shadowhand (2005). https:\/\/www.shadowrobot.com\/dexterous-hand-series\/"},{"key":"3_CR49","doi-asserted-by":"crossref","unstructured":"Shan, D., Geng, J., Shu, M., Fouhey, D.F.: Understanding human hands in contact at internet scale. In: CVPR, pp. 9869\u20139878 (2020)","DOI":"10.1109\/CVPR42600.2020.00989"},{"key":"3_CR50","unstructured":"Szot, A., et\u00a0al.: Habitat 2.0: training home assistants to rearrange their habitat. In: Advances in Neural Information Processing Systems, vol. 34, pp. 251\u2013266 (2021)"},{"key":"3_CR51","doi-asserted-by":"crossref","unstructured":"Tang, Y., Tian, Y., Lu, J., Feng, J., Zhou, J.: Action recognition in RGB-D egocentric videos. In: 2017 IEEE International Conference on Image Processing (ICIP), pp. 3410\u20133414. IEEE (2017)","DOI":"10.1109\/ICIP.2017.8296915"},{"key":"3_CR52","unstructured":"Tarvainen, A., Valpola, H.: Mean teachers are better role models: weight-averaged consistency targets improve semi-supervised deep learning results. NeurIPS 30 (2017)"},{"key":"3_CR53","doi-asserted-by":"crossref","unstructured":"Tzeng, E., Hoffman, J., Saenko, K., Darrell, T.: Adversarial discriminative domain adaptation. In: CVPR, pp. 7167\u20137176 (2017)","DOI":"10.1109\/CVPR.2017.316"},{"key":"3_CR54","unstructured":"Unity: Synthetichumans package (unity computer vision) (2022). https:\/\/github.com\/Unity-Technologies\/com.unity.cv.synthetichumans"},{"key":"3_CR55","doi-asserted-by":"crossref","unstructured":"Wang, R., et al.: Dexgraspnet: a large-scale robotic dexterous grasp dataset for general objects based on simulation. In: CVPR, pp. 11359\u201311366 (2023)","DOI":"10.1109\/ICRA48891.2023.10160982"},{"key":"3_CR56","doi-asserted-by":"crossref","unstructured":"Xia, F., R.\u00a0Zamir, A., He, Z.Y., Sax, A., Malik, J., Savarese, S.: Gibson ENV: real-world perception for embodied agents. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00945"},{"issue":"2","key":"3_CR57","doi-asserted-by":"publisher","first-page":"713","DOI":"10.1109\/LRA.2020.2965078","volume":"5","author":"F Xia","year":"2020","unstructured":"Xia, F., et al.: Interactive Gibson benchmark: a benchmark for interactive navigation in cluttered environments. IEEE Robot. Autom. Lett. 5(2), 713\u2013720 (2020)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"3_CR58","doi-asserted-by":"crossref","unstructured":"Ye, Y., et al.: Affordance diffusion: synthesizing hand-object interactions. In: CVPR, pp. 22479\u201322489 (2023)","DOI":"10.1109\/CVPR52729.2023.02153"},{"key":"3_CR59","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/978-3-031-19818-2_8","volume-title":"ECCV 2022","author":"L Zhang","year":"2022","unstructured":"Zhang, L., Zhou, S., Stent, S., Shi, J.: Fine-grained egocentric hand-object segmentation: dataset, model, and applications. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13689, pp. 127\u2013145. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19818-2_8"},{"issue":"1","key":"3_CR60","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1109\/JPROC.2020.3004555","volume":"109","author":"F Zhuang","year":"2020","unstructured":"Zhuang, F., et al.: A comprehensive survey on transfer learning. Proc. IEEE 109(1), 43\u201376 (2020)","journal-title":"Proc. IEEE"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73209-6_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,8]],"date-time":"2025-04-08T12:25:30Z","timestamp":1744115130000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73209-6_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9783031732089","9783031732096"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73209-6_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}