{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T21:17:58Z","timestamp":1757452678700,"version":"3.41.0"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031928048","type":"print"},{"value":"9783031928055","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-92805-5_1","type":"book-chapter","created":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T12:58:39Z","timestamp":1747918719000},"page":"1-17","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["On the\u00a0Application of\u00a0Egocentric Computer Vision to\u00a0Industrial Inspection"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9350-5259","authenticated-orcid":false,"given":"Vivek","family":"Chavan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7310-7528","authenticated-orcid":false,"given":"Oliver","family":"Heimann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5138-0793","authenticated-orcid":false,"given":"J\u00f6rg","family":"Kr\u00fcger","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"1_CR1","unstructured":"Apple: Apple vision pro (2024). https:\/\/www.apple.com\/apple-vision-pro\/. Accessed 10 May 2024"},{"key":"1_CR2","doi-asserted-by":"publisher","unstructured":"Bergmann, P., Fauser, M., Sattlegger, D., Steger, C.: MVTec AD - a comprehensive real-world dataset for unsupervised anomaly detection. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9584\u20139592 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00982","DOI":"10.1109\/CVPR.2019.00982"},{"key":"1_CR3","doi-asserted-by":"publisher","unstructured":"Brown, T.B., et al.: Language models are few-shot learners (2020). https:\/\/doi.org\/10.48550\/ARXIV.2005.14165. https:\/\/arxiv.org\/abs\/2005.14165","DOI":"10.48550\/ARXIV.2005.14165"},{"key":"1_CR4","doi-asserted-by":"publisher","unstructured":"Chai, J., Zeng, H., Li, A., Ngai, E.W.: Deep learning in computer vision: a critical review of emerging techniques and application scenarios. Mach. Learn. Appl. 6, 100134 (2021). https:\/\/doi.org\/10.1016\/j.mlwa.2021.100134. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S2666827021000670","DOI":"10.1016\/j.mlwa.2021.100134"},{"key":"1_CR5","doi-asserted-by":"publisher","unstructured":"Chandola, V., Banerjee, A., Kumar, V.: Anomaly detection: a survey. ACM Comput. Surv. 41(3) (2009). https:\/\/doi.org\/10.1145\/1541880.1541882","DOI":"10.1145\/1541880.1541882"},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Chavan, V., Koch, P., Schl\u00fcter, M., Briese, C., Kr\u00fcger, J.: Active data collection and management for real-world continual learning via pretrained oracle. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, pp. 4085\u20134096, June 2024","DOI":"10.1109\/CVPRW63382.2024.00412"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Chavan, V., Koch, P., Schl\u00fcter, M., Briese, C.: Towards realistic evaluation of industrial continual learning scenarios with an emphasis on energy consumption and computational footprint. In: Proceedings of the International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.01057"},{"key":"1_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"753","DOI":"10.1007\/978-3-030-01225-0_44","volume-title":"Computer Vision \u2013 ECCV 2018","author":"D Damen","year":"2018","unstructured":"Damen, D., et al.: Scaling egocentric vision: the Epic-Kitchens dataset. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 753\u2013771. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_44"},{"key":"1_CR9","doi-asserted-by":"publisher","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009). https:\/\/doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1_CR10","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"issue":"2","key":"1_CR11","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1109\/MRA.2006.1638022","volume":"13","author":"H Durrant-Whyte","year":"2006","unstructured":"Durrant-Whyte, H., Bailey, T.: Simultaneous localization and mapping: Part I. IEEE Rob. Autom. Mag. 13(2), 99\u2013110 (2006). https:\/\/doi.org\/10.1109\/MRA.2006.1638022","journal-title":"IEEE Rob. Autom. Mag."},{"key":"1_CR12","unstructured":"Engel, J., et al.: Project Aria: a new tool for egocentric multi-modal AI research (2023). https:\/\/arxiv.org\/abs\/2308.13561"},{"key":"1_CR13","unstructured":"European Commission: Data protection (2023). https:\/\/commission.europa.eu\/law\/law-topic\/data-protection_en. Accessed 28 July 2023"},{"key":"1_CR14","unstructured":"European Commission: Industry 5.0 (2023). https:\/\/research-and-innovation.ec.europa.eu\/research-area\/industrial-research-and-innovation\/industry-50_en. Accessed 28 July 2023"},{"key":"1_CR15","unstructured":"Facebook: Facebook to acquire oculus (2014). https:\/\/about.fb.com\/news\/2014\/03\/facebook-to-acquire-oculus\/. Accessed 10 May 2024"},{"key":"1_CR16","unstructured":"Facebook Research: Introduction to project aria docs (2023). https:\/\/facebookresearch.github.io\/projectaria_tools\/docs\/intro. Accessed 24 July 2023"},{"key":"1_CR17","unstructured":"Goodfellow, I., Bengio, Y., Courville, A., Bengio, Y.: Deep Learning, vol.\u00a01. MIT Press (2016)"},{"key":"1_CR18","unstructured":"Grauman, K., et al.: Ego4D: around the world in 3,000 hours of egocentric video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18995\u201319012, June 2022"},{"key":"1_CR19","unstructured":"Grauman, K., et al.: Ego-Exo4D: understanding skilled human activity from first- and third-person perspectives (2024)"},{"key":"1_CR20","doi-asserted-by":"publisher","unstructured":"Haffner, O., Ku\u010dera, E., Rosinov\u00e1, D.: Applications of machine learning and computer vision in industry 4.0. Appl. Sci. 14(6) (2024). https:\/\/doi.org\/10.3390\/app14062431. https:\/\/www.mdpi.com\/2076-3417\/14\/6\/2431","DOI":"10.3390\/app14062431"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1_CR22","doi-asserted-by":"crossref","unstructured":"Huang, S., Chen, Y., Jia, J., Wang, L.: Multi-view transformer for 3D visual grounding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15524\u201315533 (2022)","DOI":"10.1109\/CVPR52688.2022.01508"},{"key":"1_CR23","doi-asserted-by":"publisher","unstructured":"Jocher, G., et al.: ultralytics\/YOLOv5: v7.0 - YOLOv5 SOTA Realtime Instance Segmentation (v7.0), December 2022. https:\/\/doi.org\/10.5281\/zenodo.7347926","DOI":"10.5281\/zenodo.7347926"},{"issue":"9","key":"1_CR24","doi-asserted-by":"publisher","first-page":"697","DOI":"10.1037\/0003-066X.58.9.697","volume":"58","author":"D Kahneman","year":"2003","unstructured":"Kahneman, D.: A perspective on judgment and choice: mapping bounded rationality. Am. Psychol. 58(9), 697\u2013720 (2003). https:\/\/doi.org\/10.1037\/0003-066X.58.9.697. pMID: 14584987","journal-title":"Am. Psychol."},{"key":"1_CR25","doi-asserted-by":"crossref","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3D gaussian splatting for real-time radiance field rendering (2023). https:\/\/arxiv.org\/abs\/2308.04079","DOI":"10.1145\/3592433"},{"key":"1_CR26","doi-asserted-by":"publisher","unstructured":"Kim, D., et al.: Eyes are faster than hands: a soft wearable robot learns user intention from the egocentric view. Sci. Rob. 4(26), eaav2949 (2019). https:\/\/doi.org\/10.1126\/scirobotics.aav2949. https:\/\/www.science.org\/doi\/abs\/10.1126\/scirobotics.aav2949","DOI":"10.1126\/scirobotics.aav2949"},{"key":"1_CR27","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 4015\u20134026, October 2023","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Koch, S., et al.: ABC: a big cad model dataset for geometric deep learning. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.00983"},{"key":"1_CR29","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: Pereira, F., Burges, C., Bottou, L., Weinberger, K. (eds.) Advances in Neural Information Processing Systems, vol.\u00a025. Curran Associates, Inc. (2012). https:\/\/proceedings.neurips.cc\/paper\/2012\/file\/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf"},{"key":"1_CR30","unstructured":"Lecun, Y., Bengio, Y.: Convolutional networks for images, speech, and time-series (1995)"},{"issue":"7553","key":"1_CR31","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"key":"1_CR32","doi-asserted-by":"publisher","unstructured":"Lee, Y.J., Ghosh, J., Grauman, K.: Discovering important people and objects for egocentric video summarization. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1346\u20131353 (2012). https:\/\/doi.org\/10.1109\/CVPR.2012.6247820","DOI":"10.1109\/CVPR.2012.6247820"},{"key":"1_CR33","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows (2021). https:\/\/arxiv.org\/abs\/2103.14030","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1_CR34","doi-asserted-by":"publisher","unstructured":"Malamas, E.N., Petrakis, E.G., Zervakis, M., Petit, L., Legat, J.D.: A survey on industrial vision systems, applications and tools. Image Vis. Comput. 21(2), 171\u2013188 (2003). https:\/\/doi.org\/10.1016\/S0262-8856(02)00152-X. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S026288560200152X","DOI":"10.1016\/S0262-8856(02)00152-X"},{"issue":"11","key":"1_CR35","doi-asserted-by":"publisher","first-page":"2123","DOI":"10.1109\/5.726784","volume":"86","author":"S Mann","year":"1998","unstructured":"Mann, S.: Humanistic computing: \u201cwearcomp\u2019\u2019 as a new framework and application for intelligent signal processing. Proc. IEEE 86(11), 2123\u20132151 (1998). https:\/\/doi.org\/10.1109\/5.726784","journal-title":"Proc. IEEE"},{"key":"1_CR36","doi-asserted-by":"publisher","unstructured":"Mazzei, D., Ramjattan, R.: Machine learning for industry 4.0: a systematic review using deep learning-based topic modelling. Sensors 22(22) (2022). https:\/\/doi.org\/10.3390\/s22228641. https:\/\/www.mdpi.com\/1424-8220\/22\/22\/8641","DOI":"10.3390\/s22228641"},{"key":"1_CR37","unstructured":"Oquab, M., et al.: DINOv2: learning robust visual features without supervision (2023)"},{"key":"1_CR38","unstructured":"PaddlePaddle: Paddleocr documentation (2023). https:\/\/paddlepaddle.github.io\/PaddleOCR. Accessed 28 July 2024"},{"key":"1_CR39","unstructured":"Parthasarathy, N., Eslami, S.M.A., Carreira, J., Henaff, O.J.: Self-supervised video pretraining yields strong image representations (2023). https:\/\/openreview.net\/forum?id=8onXkaNWLHA"},{"key":"1_CR40","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision (2021). https:\/\/arxiv.org\/abs\/2103.00020"},{"key":"1_CR41","unstructured":"Radford, A., Kim, J.W., Xu, T., Brockman, G., McLeavey, C., Sutskever, I.: Robust speech recognition via large-scale weak supervision (2022). https:\/\/arxiv.org\/abs\/2212.04356"},{"key":"1_CR42","unstructured":"Raina, N., et al.: EgoBlur: responsible innovation in aria (2023)"},{"key":"1_CR43","doi-asserted-by":"crossref","unstructured":"Ronen, R., Tsiper, S., Anschel, O., Lavi, I., Markovitz, A., Manmatha, R.: GLASS: global to local attention for scene-text spotting. arXiv preprint arXiv:2208.03364 (2022)","DOI":"10.1007\/978-3-031-19815-1_15"},{"key":"1_CR44","doi-asserted-by":"publisher","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models (2022). https:\/\/doi.org\/10.48550\/ARXIV.2210.08402. https:\/\/arxiv.org\/abs\/2210.08402","DOI":"10.48550\/ARXIV.2210.08402"},{"key":"1_CR45","doi-asserted-by":"crossref","unstructured":"Sener, F., et al.: Assembly101: a large-scale multi-view video dataset for understanding procedural activities. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.02042"},{"key":"1_CR46","doi-asserted-by":"publisher","unstructured":"Smith, M.L., Smith, L.N., Hansen, M.F.: The quiet revolution in machine vision - a state-of-the-art survey paper, including historical review, perspectives, and future directions. Comput. Ind. 130, 103472 (2021) https:\/\/doi.org\/10.1016\/j.compind.2021.103472. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0166361521000798","DOI":"10.1016\/j.compind.2021.103472"},{"key":"1_CR47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-34372-9","volume-title":"Computer Vision: Algorithms and Applications","author":"R Szeliski","year":"2010","unstructured":"Szeliski, R.: Computer Vision: Algorithms and Applications, 1st edn. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-030-34372-9","edition":"1"},{"key":"1_CR48","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., Jegou, H.: Training data-efficient image transformers & distillation through attention. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 10347\u201310357. PMLR, 18\u201324 July 2021. https:\/\/proceedings.mlr.press\/v139\/touvron21a.html"},{"key":"1_CR49","doi-asserted-by":"publisher","unstructured":"Touvron, H., et al.: LLaMA: open and efficient foundation language models (2023). https:\/\/doi.org\/10.48550\/ARXIV.2302.13971. https:\/\/arxiv.org\/abs\/2302.13971","DOI":"10.48550\/ARXIV.2302.13971"},{"key":"1_CR50","unstructured":"Vaswani, A., et al.: Attention is all You need. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems, vol.\u00a030. Curran Associates, Inc. (2017). https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"1_CR51","unstructured":"Zhang, J., Huang, J., Jin, S., Lu, S.: Vision-language models for vision tasks: a survey (2024). https:\/\/arxiv.org\/abs\/2304.00685"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-92805-5_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T12:59:05Z","timestamp":1747918745000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-92805-5_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031928048","9783031928055"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-92805-5_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}