{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T17:33:53Z","timestamp":1769016833583,"version":"3.49.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T00:00:00Z","timestamp":1750809600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T00:00:00Z","timestamp":1750809600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,25]]},"DOI":"10.1109\/dsp65409.2025.11075122","type":"proceedings-article","created":{"date-parts":[[2025,7,15]],"date-time":"2025-07-15T17:41:05Z","timestamp":1752601265000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Advances in Electrical Grid Assets Inspection: Exploring Multimodal Large Language Models"],"prefix":"10.1109","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8184-7779","authenticated-orcid":false,"given":"Pedro Daniel","family":"Rocha","sequence":"first","affiliation":[{"name":"University of Coimbra,Instituto de Telecomunica&#x00E7;&#x00F5;es and Dept. of Elect. and Comp. Eng.,Coimbra,Portugal"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3641-0186","authenticated-orcid":false,"given":"Fernando","family":"Lopes","sequence":"additional","affiliation":[{"name":"Instituto de Telecomunica&#x00E7;&#x00F5;es and Polytechnic Institute of Coimbra, Coimbra Institute of Engineering,Coimbra,Portugal"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1141-4404","authenticated-orcid":false,"given":"Lu\u00eds A.","family":"Da Silva Cruz","sequence":"additional","affiliation":[{"name":"University of Coimbra,Instituto de Telecomunica&#x00E7;&#x00F5;es and Dept. of Elect. and Comp. Eng.,Coimbra,Portugal"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-95831-6","volume-title":"Cognitive Biases in Visualizations","author":"Ellis","year":"2018"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.2139\/ssrn.4700751","article-title":"Beyond AI Exposure: Which Tasks are Cost-Effective to Automate with Computer Vision?","volume-title":"SSRN Electronic Journal","author":"Svanberg","year":"2024"},{"key":"ref3","volume-title":"Deep Learning is Robust to Massive Label Noise","author":"Rolnick","year":"2017"},{"key":"ref4","first-page":"33 173","article-title":"Opportunities and Challenges in Data-Centric AI","volume-title":"IEEE Access","volume":"12","author":"Kumar","year":"2024"},{"key":"ref5","volume-title":"Andrew Ng: Unbiggen AI"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/DSP58604.2023.10167879"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/DSP58604.2023.10167960"},{"key":"ref8","volume-title":"Insulator Defect Detection","author":"Lewis","year":"2021"},{"issue":"11","key":"ref9","first-page":"2278","article-title":"Gradient-based learning applied to document recognition","volume-title":"Proceedings of the IEEE","volume":"86","author":"Lecun","year":"1998"},{"key":"ref10","article-title":"Imagenet classification with deep convolutional neural networks","volume-title":"Advances in Neural Information Processing Systems","volume":"25","author":"Krizhevsky","year":"2012"},{"key":"ref11","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition","author":"Simonyan","year":"2015"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/CVPR.2015.7298594","article-title":"Going deeper with convolutions","volume-title":"2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Szegedy","year":"2015"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref15","volume-title":"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks","author":"Tan","year":"2020"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref17","volume-title":"Fundamentals of Recurrent Neural Network (RNN) and Long Short-Term Memory (LSTM) Network","author":"Sherstinsky","year":"2023"},{"key":"ref18","volume-title":"Attention Is All You Need","author":"Vaswani","year":"2017"},{"key":"ref19","author":"Shaw","year":"2018","journal-title":"Self-Attention with Relative Position Representations"},{"key":"ref20","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","author":"Devlin","year":"2018"},{"key":"ref21","volume-title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","author":"Dosovitskiy","year":"2020"},{"key":"ref22","volume-title":"Training data-efficient image transformers &amp; distillation through attention","author":"Touvron","year":"2020"},{"key":"ref23","volume-title":"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows","author":"Liu","year":"2021"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3506283"},{"key":"ref25","volume-title":"Learning Transferable Visual Models From Natural Language Supervision","author":"Radford","year":"2021"},{"key":"ref26","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV51070.2023.00371","volume-title":"Segment Anything","author":"Kirillov","year":"2023"},{"key":"ref27","volume-title":"Qwen-VL: A Versatile Vision-Language Model for Understanding, Localization, Text Reading, and Beyond","author":"Bai","year":"2023"},{"key":"ref28","volume-title":"GPT-4V(ision) technical work and authors"},{"key":"ref29","volume-title":"The Llama 3 Herd of Models","year":"2024"},{"key":"ref30","volume-title":"Meta llama 3.2-vision model information"},{"key":"ref31","volume-title":"PaliGemma 2: A Family of Versatile VLMs for Transfer","author":"Steiner","year":"2024"},{"key":"ref32","article-title":"Florence-VL: Enhancing Vision-Language Models with Generative Vision Encoder and Depth-Breadth Fusion","volume-title":"arXiv preprint","author":"Chen","year":"2024"},{"key":"ref33","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models","author":"Hu","year":"2021"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"347","DOI":"10.1109\/SmartGridComm60555.2024.10738034","article-title":"Towards Automated Visual Inspection of Electrical Grid Assets for the Smart Grid - An Application to HV Insulators","volume-title":"2024 IEEE International Conference on Communications, Control, and Computing Technologies for Smart Grids (SmartGridComm)","author":"Lopes","year":"2024"},{"key":"ref35","volume-title":"EPRI Insulator Defect Image Dataset - (Roboflow)","author":"Rocha","year":"2025"},{"key":"ref36","article-title":"Automating electrical grid asset inspection using deep vision: Evaluating the impact of annotation detail level","author":"Rocha","journal-title":"Submited"}],"event":{"name":"2025 25th International Conference on Digital Signal Processing (DSP)","location":"Pylos (Messinia, Southwest Peloponnese), Greece","start":{"date-parts":[[2025,6,25]]},"end":{"date-parts":[[2025,6,27]]}},"container-title":["2025 25th International Conference on Digital Signal Processing (DSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11074775\/11074780\/11075122.pdf?arnumber=11075122","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T05:36:04Z","timestamp":1752644164000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11075122\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,25]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/dsp65409.2025.11075122","relation":{},"subject":[],"published":{"date-parts":[[2025,6,25]]}}}