{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T17:50:58Z","timestamp":1772905858667,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,27]],"date-time":"2024-05-27T00:00:00Z","timestamp":1716768000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,27]],"date-time":"2024-05-27T00:00:00Z","timestamp":1716768000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["SCH-2123749,SCH-2123521"],"award-info":[{"award-number":["SCH-2123749,SCH-2123521"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,27]]},"DOI":"10.1109\/fg59268.2024.10581969","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T17:40:08Z","timestamp":1720719608000},"page":"1-5","source":"Crossref","is-referenced-by-count":10,"title":["Adaptive Cross-Architecture Mutual Knowledge Distillation"],"prefix":"10.1109","author":[{"given":"Jianyuan","family":"Ni","sequence":"first","affiliation":[{"name":"Texas State University,USA"}]},{"given":"Hao","family":"Tang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University,USA"}]},{"given":"Yuzhang","family":"Shang","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology,USA"}]},{"given":"Bin","family":"Duan","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology,USA"}]},{"given":"Yan","family":"Yan","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology,USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6229"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5746"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00497"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00489"},{"key":"ref5","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv preprint"},{"key":"ref6","first-page":"2286","article-title":"Convit: Improving vision transformers with soft convolutional inductive biases","volume-title":"ICML","author":"Dascoli","year":"2021"},{"key":"ref7","article-title":"Residual knowledge distillation","author":"Gao","year":"2020","journal-title":"arXiv preprint"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.4140\/TCP.n.2015.249"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00143"},{"key":"ref11","author":"Krizhevsky","year":"2009","journal-title":"Learning multiple layers of features from tiny images"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01214"},{"key":"ref14","article-title":"Cross-architecture knowledge distillation","author":"Liu","year":"2022","journal-title":"arXiv preprint"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.07.048"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref17","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017","journal-title":"arXiv preprint"},{"key":"ref18","article-title":"Ensemble distribution distillation","author":"Malinin","year":"2019","journal-title":"arXiv preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00409"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01627"},{"key":"ref22","article-title":"Fitnets: Hints for thin deep nets","author":"Romero","year":"2014","journal-title":"arXiv preprint"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref24","article-title":"Deep model compression: Distilling knowledge from noisy teachers","author":"Sau","year":"2016","journal-title":"arXiv preprint"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19775-8_37"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00926"},{"key":"ref27","article-title":"Multilingual neural machine translation with knowledge distillation","author":"Tan","year":"2019","journal-title":"arXiv preprint"},{"key":"ref28","article-title":"Contrastive representation distillation","author":"Tian","year":"2019","journal-title":"arXiv preprint"},{"key":"ref29","first-page":"10347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"ICML","author":"Touvron","year":"2021"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref31","article-title":"Pay attention to features, transfer learn faster cnns","volume-title":"ICLR","author":"Wang","year":"2019"},{"key":"ref32","first-page":"5776","article-title":"Minilm: Deep self-attention distillation for task-agnostic compression of pretrained transformers","volume":"33","author":"Wang","year":"2020","journal-title":"NeurIPS"},{"key":"ref33","first-page":"28522","article-title":"Vitae: Vision transformer advanced by exploring intrinsic inductive bias","volume":"34","author":"Xu","year":"2021","journal-title":"NeurIPS"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098135"},{"key":"ref35","article-title":"Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer","author":"Zagoruyko","year":"2016","journal-title":"arXiv preprint"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00381"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00454"},{"key":"ref38","article-title":"Knowledge distillation by on-the-fly native ensemble","volume":"31","author":"Zhu","year":"2018","journal-title":"NeurIPS"}],"event":{"name":"2024 IEEE 18th International Conference on Automatic Face and Gesture Recognition (FG)","location":"Istanbul, Turkiye","start":{"date-parts":[[2024,5,27]]},"end":{"date-parts":[[2024,5,31]]}},"container-title":["2024 IEEE 18th International Conference on Automatic Face and Gesture Recognition (FG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10581880\/10581860\/10581969.pdf?arnumber=10581969","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,18]],"date-time":"2024-07-18T05:06:29Z","timestamp":1721279189000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10581969\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,27]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/fg59268.2024.10581969","relation":{},"subject":[],"published":{"date-parts":[[2024,5,27]]}}}