{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T22:58:40Z","timestamp":1780786720446,"version":"3.54.1"},"reference-count":55,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100011160","name":"Beihang University State Key Laboratory of Virtual Reality Technology and Systems","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100011160","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Digital Signal Processing"],"published-print":{"date-parts":[[2026,10]]},"DOI":"10.1016\/j.dsp.2026.106298","type":"journal-article","created":{"date-parts":[[2026,5,30]],"date-time":"2026-05-30T06:34:07Z","timestamp":1780122847000},"page":"106298","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Fine-grained adaptive distillation framework for cross-modal hashing retrieval"],"prefix":"10.1016","volume":"182","author":[{"given":"Weiyi","family":"Shi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3438-7456","authenticated-orcid":false,"given":"Lei","family":"Yun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xinsheng","family":"Shu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"issue":"3","key":"10.1016\/j.dsp.2026.106298_bib0001","first-page":"3877","article-title":"Unsupervised contrastive cross-modal hashing","volume":"45","author":"Hu","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.dsp.2026.106298_bib0002","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.101968","article-title":"When CLIP meets cross-modal hashing retrieval: a new strong baseline","volume":"100","author":"Xia","year":"2023","journal-title":"Inf. Fusion"},{"issue":"7","key":"10.1016\/j.dsp.2026.106298_bib0003","doi-asserted-by":"crossref","first-page":"3439","DOI":"10.3390\/s23073439","article-title":"Clip-based adaptive graph attention network for large-scale unsupervised multi-modal hashing retrieval","volume":"23","author":"Li","year":"2023","journal-title":"Sensors"},{"key":"10.1016\/j.dsp.2026.106298_bib0004","series-title":"2023 IEEE International Conference on Multimedia and Expo (ICME)","first-page":"126","article-title":"Deep unsupervised momentum contrastive hashing for cross-modal retrieval","author":"Lu","year":"2023"},{"issue":"10","key":"10.1016\/j.dsp.2026.106298_bib0005","doi-asserted-by":"crossref","first-page":"7255","DOI":"10.1109\/TCSVT.2022.3172716","article-title":"Deep adaptively-enhanced hashing with discriminative similarity guidance for unsupervised cross-modal retrieval","volume":"32","author":"Shi","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.dsp.2026.106298_bib0006","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.dsp.2026.106298_bib0007","series-title":"Proceedings of NAACL-HLT","first-page":"2","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","volume":"vol. 1","author":"Kenton","year":"2019"},{"key":"10.1016\/j.dsp.2026.106298_bib0008","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai, T. Unterthiner, M. Dehghani, M. Minderer, G. Heigold, S. Gelly, et al., An image is worth 16x16 words: Transformers for image recognition at scale,(2020). arXiv preprint arXiv: 2010.11929."},{"key":"10.1016\/j.dsp.2026.106298_bib0009","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"10012","article-title":"Swin transformer: hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.dsp.2026.106298_bib0010","unstructured":"X. An, J. Deng, K. Yang, J. Li, Z. Feng, J. Guo, J. Yang, T. Liu, Unicom: universal and compact representation learning for image retrieval,(2023). arXiv preprint arXiv: 2304.05884."},{"key":"10.1016\/j.dsp.2026.106298_bib0011","doi-asserted-by":"crossref","unstructured":"N. Reimers, Sentence-BERT: sentence embeddings using siamese BERT-networks,(2019). arXiv preprint arXiv: 1908.10084.","DOI":"10.18653\/v1\/D19-1410"},{"key":"10.1016\/j.dsp.2026.106298_bib0012","unstructured":"G. Hinton, O. Vinyals, J. Dean, Distilling the knowledge in a neural network,(2015). arXiv preprint arXiv: 1503.02531."},{"key":"10.1016\/j.dsp.2026.106298_bib0013","doi-asserted-by":"crossref","first-page":"6530","DOI":"10.1109\/TCSVT.2024.3350695","article-title":"CKDH: CLIP-based knowledge distillation hashing for cross-modal retrieval","volume":"34","author":"Li","year":"2024","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.dsp.2026.106298_bib0014","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3123","article-title":"Creating something from nothing: unsupervised knowledge distillation for cross-modal hashing","author":"Hu","year":"2020"},{"key":"10.1016\/j.dsp.2026.106298_bib0015","series-title":"2024 IEEE 40th International Conference on Data Engineering (ICDE)","first-page":"4699","article-title":"Unsupervised multimodal graph contrastive semantic anchor space dynamic knowledge distillation network for cross-media hash retrieval","author":"Yu","year":"2024"},{"issue":"7","key":"10.1016\/j.dsp.2026.106298_bib0016","doi-asserted-by":"crossref","first-page":"3529","DOI":"10.1109\/TCSVT.2023.3234037","article-title":"Unsupervised hashing retrieval via efficient correlation distillation","volume":"33","author":"Xi","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"6","key":"10.1016\/j.dsp.2026.106298_bib0017","doi-asserted-by":"crossref","first-page":"1789","DOI":"10.1007\/s11263-021-01453-z","article-title":"Knowledge distillation: a survey","volume":"129","author":"Gou","year":"2021","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.dsp.2026.106298_bib0018","series-title":"Proceedings of the ACM on Web Conference 2024","first-page":"1395","article-title":"Bit-mask robust contrastive knowledge distillation for unsupervised semantic hashing","author":"He","year":"2024"},{"key":"10.1016\/j.dsp.2026.106298_bib0019","series-title":"Proceedings of the 29th ACM International Conference on Multimedia","first-page":"1517","article-title":"Joint-teaching: learning to refine knowledge for resource-constrained unsupervised cross-modal retrieval","author":"Zhang","year":"2021"},{"key":"10.1016\/j.dsp.2026.106298_bib0020","series-title":"Proceedings of the 2021 International Conference on Multimedia Retrieval","first-page":"183","article-title":"Unsupervised deep cross-modal hashing by knowledge distillation for large-scale cross-modal retrieval","author":"Li","year":"2021"},{"key":"10.1016\/j.dsp.2026.106298_bib0021","doi-asserted-by":"crossref","first-page":"9109","DOI":"10.1109\/TMM.2024.3385986","article-title":"Unsupervised dual hashing coding (UDC) on semantic tagging and sample content for cross-modal retrieval","volume":"26","author":"Cai","year":"2024","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.dsp.2026.106298_bib0022","series-title":"International Conference on Neural Information Processing","first-page":"497","article-title":"Relation-guided dual hash network for unsupervised cross-modal retrieval","author":"Zheng","year":"2022"},{"key":"10.1016\/j.dsp.2026.106298_bib0023","series-title":"Proceedings of the Asian Conference on Computer Vision","first-page":"4665","article-title":"Heterogeneous interactive learning network for unsupervised cross-modal retrieval","author":"Zheng","year":"2022"},{"issue":"6","key":"10.1016\/j.dsp.2026.106298_bib0024","doi-asserted-by":"crossref","first-page":"375","DOI":"10.1007\/s00530-024-01539-x","article-title":"Dark knowledge association guided hashing for unsupervised cross-modal retrieval","volume":"30","author":"Kang","year":"2024","journal-title":"Multimed. Syst."},{"key":"10.1016\/j.dsp.2026.106298_bib0025","series-title":"Proceedings of the 25th ACM International Conference on Multimedia","first-page":"1398","article-title":"Deep binary reconstruction for cross-modal hashing","author":"Li","year":"2017"},{"issue":"9","key":"10.1016\/j.dsp.2026.106298_bib0026","doi-asserted-by":"crossref","first-page":"1404","DOI":"10.1109\/TMM.2015.2455415","article-title":"Learning compact hash codes for multimodal representations using orthogonal deep structure","volume":"17","author":"Wang","year":"2015","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.dsp.2026.106298_bib0027","series-title":"IJCAI","first-page":"5","article-title":"Unsupervised deep hashing via binary latent factor models for large-scale cross-modal retrieval","volume":"vol. 1","author":"Wu","year":"2018"},{"key":"10.1016\/j.dsp.2026.106298_bib0028","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Unsupervised generative adversarial cross-modal hashing","volume":"vol. 32","author":"Zhang","year":"2018"},{"key":"10.1016\/j.dsp.2026.106298_bib0029","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"176","article-title":"Coupled cyclegan: unsupervised hashing network for cross-modal retrieval","volume":"vol. 33","author":"Li","year":"2019"},{"issue":"4","key":"10.1016\/j.dsp.2026.106298_bib0030","doi-asserted-by":"crossref","first-page":"1602","DOI":"10.1109\/TIP.2018.2878970","article-title":"Cycle-consistent deep generative hashing for cross-modal retrieval","volume":"28","author":"Wu","year":"2018","journal-title":"IEEE Trans. Image Process."},{"issue":"1","key":"10.1016\/j.dsp.2026.106298_bib0031","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1109\/TMM.2019.2922128","article-title":"Multi-pathway generative adversarial hashing for unsupervised cross-modal retrieval","volume":"22","author":"Zhang","year":"2019","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.dsp.2026.106298_bib0032","series-title":"Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval","first-page":"1379","article-title":"Joint-modal distribution-based similarity hashing for large-scale unsupervised deep cross-modal retrieval","author":"Liu","year":"2020"},{"key":"10.1016\/j.dsp.2026.106298_bib0033","first-page":"4626","article-title":"Deep graph-neighbor coherence preserving network for unsupervised cross-modal hashing","volume":"35","author":"Yu","year":"2021","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.dsp.2026.106298_bib0034","doi-asserted-by":"crossref","first-page":"8838","DOI":"10.1109\/TKDE.2022.3218656","article-title":"Work together: correlation-identity reconstruction hashing for unsupervised cross-modal retrieval","volume":"35","author":"Zhu","year":"2022","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.dsp.2026.106298_bib0035","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"3027","article-title":"Deep joint-semantics reconstructing hashing for large-scale unsupervised cross-modal retrieval","author":"Su","year":"2019"},{"key":"10.1016\/j.dsp.2026.106298_bib0036","doi-asserted-by":"crossref","first-page":"466","DOI":"10.1109\/TMM.2021.3053766","article-title":"Aggregation-based graph convolutional hashing for unsupervised cross-modal retrieval","volume":"24","author":"Zhang","year":"2021","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.dsp.2026.106298_bib0037","series-title":"IJCAI","first-page":"4569","article-title":"Dual semantic fusion hashing for multi-label cross-modal retrieval","author":"Liu","year":"2024"},{"key":"10.1016\/j.dsp.2026.106298_bib0038","series-title":"BMVC","first-page":"1035","article-title":"Hugs are better than handshakes: unsupervised cross-modal transformer hashing with multi-granularity alignment","author":"Wang","year":"2022"},{"key":"10.1016\/j.dsp.2026.106298_bib0039","series-title":"IJCAI","first-page":"623","article-title":"Enhancing cross-modal retrieval via visual-textual prompt hashing","author":"Chen","year":"2024"},{"key":"10.1016\/j.dsp.2026.106298_bib0040","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s13042-024-02154-y","article-title":"Unsupervised deep hashing with multiple similarity preservation for cross-modal image-text retrieval","volume":"15","author":"Xiong","year":"2024","journal-title":"Int. J. Mach. Learn. Cybern."},{"key":"10.1016\/j.dsp.2026.106298_bib0041","first-page":"1","article-title":"Hugs bring double benefits: unsupervised cross-modal hashing with multi-granularity aligned transformers","volume":"132","author":"Wang","year":"2024","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.dsp.2026.106298_bib0042","series-title":"Findings of the Association for Computational Linguistics: ACL 2024","first-page":"15890","article-title":"Visual in-context learning for large vision-language models","author":"Zhou","year":"2024"},{"key":"10.1016\/j.dsp.2026.106298_bib0043","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"19732","article-title":"Fine-grained distillation for long document retrieval","volume":"vol. 38","author":"Zhou","year":"2024"},{"key":"10.1016\/j.dsp.2026.106298_bib0044","series-title":"Findings of the Association for Computational Linguistics: ACL 2023","first-page":"5387","article-title":"Towards robust ranker for text retrieval","author":"Zhou","year":"2023"},{"key":"10.1016\/j.dsp.2026.106298_bib0045","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"26307","article-title":"Linguistic-aware patch slimming framework for fine-grained cross-modal alignment","author":"Fu","year":"2024"},{"key":"10.1016\/j.dsp.2026.106298_bib0046","series-title":"Proceedings of the 2022 International Conference on Multimedia Retrieval","first-page":"631","article-title":"MSSPQ: multiple semantic structure-preserving quantization for cross-modal retrieval","author":"Zhu","year":"2022"},{"key":"10.1016\/j.dsp.2026.106298_bib0047","series-title":"Proceedings of the 31st ACM International Conference on Multimedia","first-page":"893","article-title":"Multi-granularity interactive transformer hashing for cross-modal retrieval","author":"Liu","year":"2023"},{"key":"10.1016\/j.dsp.2026.106298_bib0048","series-title":"Proceedings of the 1st ACM International Conference on Multimedia Information Retrieval","first-page":"39","article-title":"The mir flickr retrieval evaluation","author":"Huiskes","year":"2008"},{"key":"10.1016\/j.dsp.2026.106298_bib0049","series-title":"Proceedings of the ACM International Conference on Image and Video Retrieval","first-page":"1","article-title":"Nus-wide: a real-world web image database from national university of singapore","author":"Chua","year":"2009"},{"issue":"4","key":"10.1016\/j.dsp.2026.106298_bib0050","doi-asserted-by":"crossref","first-page":"419","DOI":"10.1016\/j.cviu.2009.03.008","article-title":"The segmented and annotated IAPR TC-12 benchmark","volume":"114","author":"Escalante","year":"2010","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.dsp.2026.106298_bib0051","series-title":"Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13","first-page":"740","article-title":"Microsoft COCO: common objects in context","author":"Lin","year":"2014"},{"issue":"2","key":"10.1016\/j.dsp.2026.106298_bib0052","doi-asserted-by":"crossref","first-page":"560","DOI":"10.1109\/TKDE.2020.2987312","article-title":"Deep cross-modal hashing with hashing functions and unified hash codes jointly learning","volume":"34","author":"Tu","year":"2020","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.dsp.2026.106298_bib0053","doi-asserted-by":"crossref","first-page":"662","DOI":"10.1109\/TMM.2021.3129623","article-title":"Semi-supervised knowledge distillation for cross-modal hashing","volume":"25","author":"Su","year":"2021","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.dsp.2026.106298_bib0054","series-title":"International Conference on Neural Information Processing","first-page":"93","article-title":"CCUH: CLIP-based clustering method for unsupervised hashing multi-modal retrieval","author":"Zhu","year":"2024"},{"issue":"1","key":"10.1016\/j.dsp.2026.106298_bib0055","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1007\/s13735-025-00353-z","article-title":"Dual-matrix guided reconstruction hashing for unsupervised cross-modal retrieval","volume":"14","author":"Lin","year":"2025","journal-title":"Int. J. Multimed. Inf. Retr."}],"container-title":["Digital Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1051200426004161?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1051200426004161?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T21:58:57Z","timestamp":1780783137000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1051200426004161"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,10]]},"references-count":55,"alternative-id":["S1051200426004161"],"URL":"https:\/\/doi.org\/10.1016\/j.dsp.2026.106298","relation":{},"ISSN":["1051-2004"],"issn-type":[{"value":"1051-2004","type":"print"}],"subject":[],"published":{"date-parts":[[2026,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Fine-grained adaptive distillation framework for cross-modal hashing retrieval","name":"articletitle","label":"Article Title"},{"value":"Digital Signal Processing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.dsp.2026.106298","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Inc. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"106298"}}