{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T15:15:44Z","timestamp":1751555744719,"version":"3.41.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T00:00:00Z","timestamp":1748131200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T00:00:00Z","timestamp":1748131200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100020950","name":"National Science and Technology Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100020950","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,25]]},"DOI":"10.1109\/iscas56072.2025.11043866","type":"proceedings-article","created":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T17:42:19Z","timestamp":1751046139000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Global-Local Similarity for Efficient Fine-Grained Image Recognition with Vision Transformers"],"prefix":"10.1109","author":[{"given":"Edwin Arkel","family":"Rios","sequence":"first","affiliation":[{"name":"National Yang Ming Chiao Tung University,Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Min-Chun","family":"Hu","sequence":"additional","affiliation":[{"name":"National Tsing Hua University,Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo-Cheng","family":"Lai","sequence":"additional","affiliation":[{"name":"National Yang Ming Chiao Tung University,Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298658"},{"first-page":"8","article-title":"The Caltech-UCSD Birds-200-2011 Dataset","author":"Wah","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3126648"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6822"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_26"},{"journal-title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","year":"2020","author":"Dosovitskiy","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19967"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.5244\/C.35.192"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747591"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00106"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475561"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3248791"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.385"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00465"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.04.037"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2023.3241969"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.476"},{"journal-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","year":"2019","author":"Devlin","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"article-title":"Layer Normalization","year":"2016","author":"Ba","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00914"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00833"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7016"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-33676-9_5"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16176"},{"journal-title":"Training data-efficient image transformers & distillation through attention","year":"2021","author":"Touvron","key":"ref26"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3244340"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2024.104923"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2023.3312645"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_4"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00515"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548308"},{"article-title":"Novel Dataset for Fine-Grained Image Categorization","volume-title":"First Workshop on Fine-Grained Visual Categorization, IEEE Conference on Computer Vision and Pattern Recognition","author":"Khosla","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2016.90"}],"event":{"name":"2025 IEEE International Symposium on Circuits and Systems (ISCAS)","start":{"date-parts":[[2025,5,25]]},"location":"London, United Kingdom","end":{"date-parts":[[2025,5,28]]}},"container-title":["2025 IEEE International Symposium on Circuits and Systems (ISCAS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11043142\/11042930\/11043866.pdf?arnumber=11043866","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T07:00:23Z","timestamp":1751094023000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11043866\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,25]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/iscas56072.2025.11043866","relation":{},"subject":[],"published":{"date-parts":[[2025,5,25]]}}}