{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,22]],"date-time":"2026-07-22T10:47:56Z","timestamp":1784717276953,"version":"3.55.0"},"reference-count":111,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62006112"],"award-info":[{"award-number":["62006112"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61921006"],"award-info":[{"award-number":["61921006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176117"],"award-info":[{"award-number":["62176117"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSF of Jiangsu Province","award":["BK20200313"],"award-info":[{"award-number":["BK20200313"]}]},{"name":"Nanjing University-Huawei Joint Research Program"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,2,1]]},"DOI":"10.1109\/tpami.2022.3160328","type":"journal-article","created":{"date-parts":[[2022,3,17]],"date-time":"2022-03-17T20:31:51Z","timestamp":1647549111000},"page":"1817-1834","source":"Crossref","is-referenced-by-count":23,"title":["Generalized Knowledge Distillation via Relationship Matching"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1173-1880","authenticated-orcid":false,"given":"Han-Jia","family":"Ye","sequence":"first","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Su","family":"Lu","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"De-Chuan","family":"Zhan","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150464"},{"key":"ref3","article-title":"Distilling the knowledge in a neural network","author":"Hinton","year":"2015"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.754"},{"key":"ref5","article-title":"FitNets: Hints for thin deep nets","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Romero","year":"2015"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00302"},{"key":"ref7","first-page":"1602","article-title":"Born-again neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Furlanello","year":"2018"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00297"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58574-7_42"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00241"},{"key":"ref11","first-page":"9782","article-title":"DynaBERT: Dynamic BERT with adaptive width and depth","volume-title":"Neural Inf. Process. Syst.","author":"Hou","year":"2020"},{"key":"ref12","first-page":"5776","article-title":"MiniLM: Deep self-attention distillation for task-agnostic compression of pre-trained transformers","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Wang","year":"2020"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01103"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01465"},{"key":"ref15","article-title":"Dataset distillation","author":"Wang","year":"2018"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00938"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"ref18","first-page":"4743","article-title":"Zero-shot knowledge distillation in deep networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Nayak","year":"2019"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00489"},{"key":"ref20","article-title":"Dataset condensation with gradient matching","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhao","year":"2021"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00454"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00152"},{"key":"ref23","article-title":"M2KD: Multi-model and multi-level knowledge distillation for incremental learning","author":"Zhou","year":"2019"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20876-9_1"},{"key":"ref25","article-title":"Learning to cluster in order to transfer across domains and tasks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hsu","year":"2018"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ITA.2018.8503149"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.434"},{"key":"ref28","first-page":"2859","article-title":"Sampling matters in deep embedding learning","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit.","author":"Manmatha","year":"2017"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-016-6906-3"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-016-5594-4"},{"key":"ref32","first-page":"574","article-title":"Hypothesis transfer learning via transformation functions","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Du","year":"2017"},{"key":"ref33","first-page":"2830","article-title":"Explicit inductive bias for transfer learning with convolutional networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li","year":"2018"},{"key":"ref34","first-page":"4730","article-title":"Knowledge transfer with jacobian matching","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Srinivas","year":"2018"},{"key":"ref35","first-page":"1904","article-title":"Rectify heterogeneous models with semantic mapping","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ye","year":"2018"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2994749"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2009.06.042"},{"key":"ref38","first-page":"2023","article-title":"Learning using privileged information: Similarity control and knowledge transfer","volume-title":"J. Mach. Learn. Res.","volume":"16","author":"Vapnik","year":"2015"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-33395-3_1"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2004.11"},{"key":"ref41","first-page":"5142","article-title":"Towards understanding knowledge distillation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Phuong","year":"2019"},{"key":"ref42","article-title":"A closer look at deep learning heuristics: Learning rate restarts, warmup and distillation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Gotmare","year":"2019"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_16"},{"key":"ref44","article-title":"Label refinery: Improving imagenet classification through label progression","author":"Bagherinezhad","year":"2018"},{"key":"ref45","article-title":"Deep model compression: Distilling knowledge from noisy teachers","author":"Sau","year":"2016"},{"key":"ref46","article-title":"Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zagoruyko","year":"2017"},{"key":"ref47","first-page":"4281","article-title":"Sobolev training for neural networks","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Czarnecki","year":"2017"},{"key":"ref48","first-page":"3509","article-title":"LIT: Learned intermediate representation training for model compression","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Koratana","year":"2019"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_21"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58610-2_2"},{"key":"ref51","first-page":"8935","article-title":"Residual distillation: Towards portable deep neural networks without shortcuts","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Li","year":"2020"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2773081"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-020-2900-x"},{"key":"ref54","first-page":"6840","article-title":"Heterogeneous model reuse via optimizing multiparty multiclass margin","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wu","year":"2019"},{"key":"ref55","first-page":"3630","article-title":"Matching networks for one shot learning","author":"Vinyals","year":"2016","journal-title":"Proc. Neural Inf. Process. Syst."},{"key":"ref56","first-page":"4080","article-title":"Prototypical networks for few-shot learning","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Snell","year":"2017"},{"key":"ref57","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Finn","year":"2017"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00610"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00883"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-020-3055-1"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475306"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.587"},{"key":"ref63","first-page":"207","article-title":"Distance metric learning for large margin nearest neighbor classification","volume":"10","author":"Weinberger","year":"2009","journal-title":"J. Mach. Learn. Res."},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2829192"},{"key":"ref66","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume":"119","author":"Chen","year":"2020","journal-title":"Proc. Int. Conf. Mach. Learn."},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2901675"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273523"},{"key":"ref69","first-page":"2579","article-title":"Visualizing data using T-SNE","volume":"9","author":"Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2012.6349720"},{"key":"ref71","first-page":"1472","article-title":"Multiview triplet embedding: Learning attributes in multiple maps","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Amid","year":"2015"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2970494"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00813"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01603"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00201"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00511"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00145"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00726"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00319"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207148"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00914"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00409"},{"key":"ref83","first-page":"12980","article-title":"Compress: Self-supervised learning by compressing representations","author":"Koohpayegani","year":"2020","journal-title":"Proc. Adv. Neural Inf. Process. Syst."},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00947"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58545-7_34"},{"key":"ref86","article-title":"SEED: Self-supervised distillation for visual representation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Fang","year":"2021"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46478-7_31"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00208"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.83"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00636"},{"key":"ref91","article-title":"Feature matters: A stage-by-stage approach for knowledge transfer","author":"Gao","year":"2018"},{"key":"ref92","article-title":"The Caltech-UCSD Birds-200-2011 Dataset","author":"Wah","year":"2011"},{"key":"ref93","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"ref94","first-page":"719","article-title":"TADAM: Task dependent adaptive metric for improved few-shot learning","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Oreshkin","year":"2018"},{"key":"ref95","article-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications","author":"Howard","year":"2017"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.87"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00742"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_39"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00396"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref101","article-title":"Novel dataset for fine-grained image categorization","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit. First Workshop Fine-Grained Vis. Categorization","author":"Khosla","year":"2011"},{"key":"ref102","article-title":"Like what you like: Knowledge distill via neuron selectivity transfer","author":"Huang","year":"2017"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.328"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01381-4"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01322"},{"key":"ref107","article-title":"Decoupling representation and classifier for long-tailed recognition","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kang","year":"2020"},{"key":"ref108","article-title":"Optimization as a model for few-shot learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ravi","year":"2017"},{"key":"ref109","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ioffe","year":"2015"},{"key":"ref110","article-title":"Simpleshot: Revisiting nearest-neighbor classification for few-shot learning","author":"Wang","year":"2019"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_26"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10008914\/09737403.pdf?arnumber=9737403","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T19:25:02Z","timestamp":1743794702000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9737403\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,1]]},"references-count":111,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2022.3160328","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,2,1]]}}}