{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:33:42Z","timestamp":1763192022120,"version":"3.45.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100020950","name":"National Science and Technology Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100020950","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11229357","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Generalization-Aware Zero-Shot Neural Architecture Search for Self-Supervised Transformers"],"prefix":"10.1109","author":[{"given":"Jun-Hua","family":"Ko","sequence":"first","affiliation":[{"name":"National Taiwan University,Taipei,Taiwan"}]},{"given":"Tzi-Dar","family":"Chiueh","sequence":"additional","affiliation":[{"name":"National Taiwan University,Taipei,Taiwan"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00293"},{"key":"ref3","first-page":"24 254","article-title":"Litetransformersearch: Training-free neural architecture search for efficient language models","volume":"35","author":"Javaheripi","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01062"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3378781"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01426"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/AICAS59952.2024.10595905"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1017\/9781009025096.004"},{"key":"ref11","first-page":"2902","article-title":"Large-scale evolution of image classifiers","volume-title":"International Conference on Machine Learning","author":"Real"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_32"},{"article-title":"Once for all: Train one network and specialize it for efficient deployment","volume-title":"International Conference on Learning Representations","author":"Cai","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00040"},{"article-title":"Rethinking architecture selection in differentiable NAS","volume-title":"International Conference on Learning Representations","author":"Wang","key":"ref15"},{"key":"ref16","first-page":"7588","article-title":"Neural architecture search without training","volume-title":"International Conference on Machine Learning","author":"Mellor"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01145"},{"issue":"8","key":"ref18","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"ICLR","author":"Dosovitskiy","key":"ref19"},{"key":"ref20","first-page":"10 347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"International Conference on Machine Learning","volume":"139","author":"Touvron"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref22","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"International conference on machine learning","author":"Chen"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref24","first-page":"35 632","article-title":"Mcmae: Masked convolution meets masked autoencoders","volume":"35","author":"Gao","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"Masked distillation advances self-supervised transformer architecture search","volume-title":"The Twelfth International Conference on Learning Representations","author":"Yan","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3395423"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-short.73"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01403"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01205"},{"key":"ref30","first-page":"10 347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"International conference on machine learning","author":"Touvron"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19803-8_9"},{"article-title":"Picking winning tickets before training by preserving gradient flow","volume-title":"International Conference on Learning Representations","author":"Wang","key":"ref32"},{"article-title":"Gradsign: Model performance inference with theoretical insights","year":"2022","author":"Zhang","key":"ref33"},{"article-title":"Knas: Green neural architecture search","volume-title":"Proceedings of ICML 2021","author":"Xu","key":"ref34"},{"article-title":"Zero-Cost Proxies for Lightweight NAS","volume-title":"International Conference on Learning Representations (ICLR)","author":"Abdelfattah","key":"ref35"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-1140-0"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_26"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2025,6,30]]},"location":"Rome, Italy","end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11229357.pdf?arnumber=11229357","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:31:02Z","timestamp":1763191862000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11229357\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11229357","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}