{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T01:57:19Z","timestamp":1773971839191,"version":"3.50.1"},"reference-count":72,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62372478"],"award-info":[{"award-number":["62372478"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62472165"],"award-info":[{"award-number":["62472165"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1016\/j.neunet.2025.108518","type":"journal-article","created":{"date-parts":[[2025,12,27]],"date-time":"2025-12-27T00:05:23Z","timestamp":1766793923000},"page":"108518","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["CILF-CIAE: CLIP-driven image-language fusion for correcting inverse age estimation"],"prefix":"10.1016","volume":"197","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3270-6238","authenticated-orcid":false,"given":"Yuntao","family":"Shou","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9787-2002","authenticated-orcid":false,"given":"Tao","family":"Meng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9047-7977","authenticated-orcid":false,"given":"Wei","family":"Ai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0177-8514","authenticated-orcid":false,"given":"Nan","family":"Yin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5224-4048","authenticated-orcid":false,"given":"Keqin","family":"Li","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neunet.2025.108518_bib0001","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"1643","article-title":"Anchored regression networks applied to age estimation and super resolution","author":"Agustsson","year":"2017"},{"key":"10.1016\/j.neunet.2025.108518_bib0002","doi-asserted-by":"crossref","first-page":"23716","DOI":"10.52202\/068431-1723","article-title":"Flamingo: A visual language model for few-shot learning","volume":"35","author":"Alayrac","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2025.108518_bib0003","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1016\/j.neucom.2023.02.037","article-title":"Deep domain-invariant learning for facial age estimation","volume":"534","author":"Bao","year":"2023","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neunet.2025.108518_bib0004","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1109\/TIFS.2022.3218431","article-title":"Divergence-driven consistency training for semi-supervised facial age estimation","volume":"18","author":"Bao","year":"2022","journal-title":"IEEE Transactions on Information Forensics and Security"},{"key":"10.1016\/j.neunet.2025.108518_bib0005","series-title":"Biometric recognition: 7th chinese conference, CCBR 2012, guangzhou, china, december 4\u20135, 2012. proceedings 7","first-page":"324","article-title":"Human age estimation using ranking svm","author":"Cao","year":"2012"},{"key":"10.1016\/j.neunet.2025.108518_bib0006","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1016\/j.patrec.2020.11.008","article-title":"Rank consistent ordinal regression for neural networks with application to age estimation","volume":"140","author":"Cao","year":"2020","journal-title":"Pattern Recognition Letters"},{"issue":"3","key":"10.1016\/j.neunet.2025.108518_bib0007","doi-asserted-by":"crossref","first-page":"785","DOI":"10.1109\/TIP.2014.2387379","article-title":"A learning framework for age rank estimation based on face images with scattering transform","volume":"24","author":"Chang","year":"2015","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.neunet.2025.108518_bib0008","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"13984","article-title":"Label distribution learning on auxiliary label space graphs for facial expression recognition","author":"Chen","year":"2020"},{"issue":"8","key":"10.1016\/j.neunet.2025.108518_bib0009","doi-asserted-by":"crossref","first-page":"2209","DOI":"10.1109\/TMM.2017.2786869","article-title":"Deep age estimation: From classification to ranking","volume":"20","author":"Chen","year":"2017","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2025.108518_bib0010","doi-asserted-by":"crossref","first-page":"164","DOI":"10.1016\/j.isprsjprs.2025.05.008","article-title":"Profit: A prompt-guided frequency-aware filtering and template-enhanced interaction framework for hyperspectral video tracking","volume":"226","author":"Chen","year":"2025","journal-title":"ISPRS Journal of Photogrammetry and Remote Sensing"},{"key":"10.1016\/j.neunet.2025.108518_bib0011","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102658","article-title":"Ssttrack: A unified hyperspectral video tracking framework via modeling spectral-spatial-temporal conditions","volume":"114","author":"Chen","year":"2025","journal-title":"Information Fusion"},{"key":"10.1016\/j.neunet.2025.108518_bib0012","series-title":"International conference on learning representations","article-title":"Rethinking attention with performers","author":"Choromanski","year":"2021"},{"key":"10.1016\/j.neunet.2025.108518_bib0013","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"10503","article-title":"Pml: Progressive margin loss for long-tailed age classification","author":"Deng","year":"2021"},{"key":"10.1016\/j.neunet.2025.108518_bib0014","series-title":"European conference on computer vision","first-page":"1","article-title":"Teach clip to develop a number sense for ordinal regression","author":"Du","year":"2024"},{"issue":"3","key":"10.1016\/j.neunet.2025.108518_bib0015","doi-asserted-by":"crossref","first-page":"758","DOI":"10.1109\/TIFS.2017.2766583","article-title":"An ensemble CNN2ELM for age estimation","volume":"13","author":"Duan","year":"2017","journal-title":"IEEE Transactions on Information Forensics and Security"},{"key":"10.1016\/j.neunet.2025.108518_bib0016","series-title":"Proceedings of the IEEE\/CVF winter conference on applications of computer vision","first-page":"2402","article-title":"Facial expression recognition in the wild via deep attentive center loss","author":"Farzaneh","year":"2021"},{"issue":"6","key":"10.1016\/j.neunet.2025.108518_bib0017","doi-asserted-by":"crossref","first-page":"2825","DOI":"10.1109\/TIP.2017.2689998","article-title":"Deep label distribution learning with label ambiguity","volume":"26","author":"Gao","year":"2017","journal-title":"IEEE Transactions on Image Processing"},{"issue":"1","key":"10.1016\/j.neunet.2025.108518_bib0018","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1109\/TPAMI.2022.3152247","article-title":"A survey on vision transformer","volume":"45","author":"Han","year":"2022","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.neunet.2025.108518_bib0019","unstructured":"Jin, W., Mukherjee, S., Cheng, Y., Shen, Y., Chen, W., Awadallah, A. H., Jose, D., & Ren, X. (2023). Grill: Grounded vision-language pre-training via aligning text and image regions. arXiv preprint arXiv: 2305.14676."},{"key":"10.1016\/j.neunet.2025.108518_bib0020","series-title":"37th conference on neural information processing systems (neurIPS)","article-title":"Physics-driven ML-based modelling for correcting inverse estimation","author":"Kang","year":"2023"},{"issue":"10s","key":"10.1016\/j.neunet.2025.108518_bib0021","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3505244","article-title":"Transformers in vision: A survey","volume":"54","author":"Khan","year":"2022","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"10.1016\/j.neunet.2025.108518_bib0022","first-page":"1008","article-title":"Uniclip: Unified framework for contrastive language-image pre-training","volume":"35","author":"Lee","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2025.108518_bib0023","series-title":"Proceedings of the 2022 conference of the north american chapter of the association for computational linguistics: Human language technologies","article-title":"Fnet: Mixing tokens with fourier transforms","author":"Lee-Thorp","year":"2022"},{"key":"10.1016\/j.neunet.2025.108518_bib0024","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition workshops","first-page":"34","article-title":"Age and gender classification using convolutional neural networks","author":"Levi","year":"2015"},{"key":"10.1016\/j.neunet.2025.108518_bib0025","doi-asserted-by":"crossref","first-page":"2016","DOI":"10.1109\/TIP.2021.3049955","article-title":"Adaptively learning facial expression representation via cf labels and distillation","volume":"30","author":"Li","year":"2021","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.neunet.2025.108518_bib0026","series-title":"International conference on machine learning","first-page":"19730","article-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","author":"Li","year":"2023"},{"key":"10.1016\/j.neunet.2025.108518_bib0027","series-title":"International conference on machine learning","first-page":"12888","article-title":"Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation","author":"Li","year":"2022"},{"key":"10.1016\/j.neunet.2025.108518_bib0028","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"1145","article-title":"Bridgenet: A continuity-aware probabilistic network for age estimation","author":"Li","year":"2019"},{"key":"10.1016\/j.neunet.2025.108518_bib0029","first-page":"34892","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"7","key":"10.1016\/j.neunet.2025.108518_bib0030","doi-asserted-by":"crossref","first-page":"1808","DOI":"10.1109\/TMM.2020.2969793","article-title":"Similarity-aware and variational deep adversarial learning for robust facial age estimation","volume":"22","author":"Liu","year":"2020","journal-title":"IEEE Transactions on Multimedia"},{"issue":"2","key":"10.1016\/j.neunet.2025.108518_bib0031","doi-asserted-by":"crossref","first-page":"1236","DOI":"10.1109\/TAFFC.2021.3122146","article-title":"Facial expression recognition with visual transformers and attentional selective fusion","volume":"14","author":"Ma","year":"2021","journal-title":"IEEE Transactions on Affective Computing"},{"key":"10.1016\/j.neunet.2025.108518_bib0032","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110951","article-title":"Poster++: A simpler and stronger facial expression recognition network","volume":"157","author":"Mao","year":"2025","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.neunet.2025.108518_bib0033","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"4920","article-title":"Ordinal regression with multiple output cnn for age estimation","author":"Niu","year":"2016"},{"key":"10.1016\/j.neunet.2025.108518_bib0034","series-title":"International conference on machine learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.neunet.2025.108518_bib0035","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"18082","article-title":"Denseclip: Language-guided dense prediction with context-aware prompting","author":"Rao","year":"2022"},{"issue":"2\u20134","key":"10.1016\/j.neunet.2025.108518_bib0036","doi-asserted-by":"crossref","first-page":"144","DOI":"10.1007\/s11263-016-0940-3","article-title":"Deep expectation of real and apparent age from a single image without facial landmarks","volume":"126","author":"Rothe","year":"2018","journal-title":"International Journal of Computer Vision"},{"key":"10.1016\/j.neunet.2025.108518_bib0037","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"7660","article-title":"Feature decomposition and reconstruction learning for effective facial expression recognition","author":"Ruan","year":"2021"},{"key":"10.1016\/j.neunet.2025.108518_bib0038","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"6248","article-title":"Dive into ambiguity: Latent distribution mining and pairwise uncertainty estimation for facial expression recognition","author":"She","year":"2021"},{"issue":"1","key":"10.1016\/j.neunet.2025.108518_bib0039","doi-asserted-by":"crossref","first-page":"1408","DOI":"10.1038\/s41598-022-05468-5","article-title":"Attention-guided deep learning for gestational age prediction using fetal brain MRI","volume":"12","author":"Shen","year":"2022","journal-title":"Scientific Reports"},{"issue":"2","key":"10.1016\/j.neunet.2025.108518_bib0040","doi-asserted-by":"crossref","first-page":"404","DOI":"10.1109\/TPAMI.2019.2937294","article-title":"Deep differentiable random forests for age estimation","volume":"43","author":"Shen","year":"2019","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.neunet.2025.108518_bib0041","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"2304","article-title":"Deep regression forests for age estimation","author":"Shen","year":"2018"},{"key":"10.1016\/j.neunet.2025.108518_bib0042","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"18760","article-title":"Moving window regression: A novel approach to ordinal regression","author":"Shin","year":"2022"},{"key":"10.1016\/j.neunet.2025.108518_bib0043","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110974","article-title":"Masked contrastive graph representation learning for age estimation","volume":"158","author":"Shou","year":"2025","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.neunet.2025.108518_bib0044","unstructured":"Shou, Y., Cao, X., & Meng, D. (2023). Masked contrastive graph representation learning for age estimation. arXiv preprint arXiv: 2306.17798."},{"key":"10.1016\/j.neunet.2025.108518_bib0045","series-title":"Ijcai","first-page":"3548","article-title":"Deeply-learned hybrid representations for facial age estimation","author":"Tan","year":"2019"},{"key":"10.1016\/j.neunet.2025.108518_bib0046","series-title":"International conference on medical image computing and computer-assisted intervention","first-page":"36","article-title":"Medical transformer: Gated axial-attention for medical image segmentation","author":"Valanarasu","year":"2021"},{"key":"10.1016\/j.neunet.2025.108518_bib0047","doi-asserted-by":"crossref","first-page":"131988","DOI":"10.1109\/ACCESS.2020.3010018","article-title":"Pyramid with super resolution for in-the-wild facial expression recognition","volume":"8","author":"Vo","year":"2020","journal-title":"IEEE Access"},{"key":"10.1016\/j.neunet.2025.108518_bib0048","doi-asserted-by":"crossref","first-page":"1084","DOI":"10.1109\/TIP.2021.3139226","article-title":"Improving face-based age estimation with attention-based dynamic patch fusion","volume":"31","author":"Wang","year":"2022","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.neunet.2025.108518_bib0049","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"6897","article-title":"Suppressing uncertainties for large-scale facial expression recognition","author":"Wang","year":"2020"},{"key":"10.1016\/j.neunet.2025.108518_bib0050","doi-asserted-by":"crossref","first-page":"4057","DOI":"10.1109\/TIP.2019.2956143","article-title":"Region attention networks for pose and occlusion robust facial expression recognition","volume":"29","author":"Wang","year":"2020","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.neunet.2025.108518_bib0051","first-page":"76908","article-title":"Learning-to-rank meets language: Boosting language-driven ordering alignment for ordinal classification","volume":"36","author":"Wang","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2025.108518_bib0052","article-title":"Frequency-assisted mamba for remote sensing image super-resolution","author":"Xiao","year":"2024","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2025.108518_bib0053","doi-asserted-by":"crossref","first-page":"738","DOI":"10.1109\/TIP.2023.3349004","article-title":"Ttst: A top-k token selective transformer for remote sensing image super-resolution","volume":"33","author":"Xiao","year":"2024","journal-title":"IEEE Transactions on Image Processing"},{"issue":"6","key":"10.1016\/j.neunet.2025.108518_bib0054","doi-asserted-by":"crossref","first-page":"1263","DOI":"10.1109\/TCSVT.2015.2511543","article-title":"Hybrid CNN and dictionary-based models for scene recognition and domain adaptation","volume":"27","author":"Xie","year":"2015","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2025.108518_bib0055","unstructured":"Ye, Q., Xu, H., Xu, G., Ye, J., Yan, M., Zhou, Y., Wang, J., Hu, A., Shi, P., Shi, Y. et al. (2023). mplug-owl: Modularization empowers large language models with multimodality. arXiv preprint arXiv: 2304.14178."},{"key":"10.1016\/j.neunet.2025.108518_bib0056","series-title":"Proceedings of the 30th ACM international conference on multimedia","first-page":"3470","article-title":"Deal: An unsupervised domain adaptive framework for graph-level classification","author":"Yin","year":"2022"},{"key":"10.1016\/j.neunet.2025.108518_bib0057","unstructured":"Yin, N., Shen, L., Wang, M., Lan, L., Ma, Z., Chen, C., Hua, X.-S., & Luo, X. (2023a). Coco: A coupled contrastive framework for unsupervised domain adaptive graph classification. arXiv preprint arXiv: 2306.04979."},{"issue":"12","key":"10.1016\/j.neunet.2025.108518_bib0058","doi-asserted-by":"crossref","first-page":"12873","DOI":"10.1109\/TKDE.2023.3271677","article-title":"Omg: Towards effective graph classification against label noise","volume":"35","author":"Yin","year":"2023","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"10.1016\/j.neunet.2025.108518_bib0059","unstructured":"Yin, N., Shen, L., Xiong, H., Gu, B., Chen, C., Hua, X., Liu, S., & Luo, X. (5555). Messages are never propagated alone: Collaborative hypergraph neural network for time-series forecasting. IEEE Transactions on Pattern Analysis and Machine Intelligence, (01), 1\u201315. 10.1109\/TPAMI.2023.3331389."},{"key":"10.1016\/j.neunet.2025.108518_bib0060","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"20291","article-title":"Face2exp: Combating data biases for facial expression recognition","author":"Zeng","year":"2022"},{"key":"10.1016\/j.neunet.2025.108518_bib0061","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"11975","article-title":"Sigmoid loss for language image pre-training","author":"Zhai","year":"2023"},{"issue":"9","key":"10.1016\/j.neunet.2025.108518_bib0062","doi-asserted-by":"crossref","first-page":"3140","DOI":"10.1109\/TCSVT.2019.2936410","article-title":"Fine-grained age estimation in the wild with attention LSTM networks","volume":"30","author":"Zhang","year":"2019","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2025.108518_bib0063","first-page":"27061","article-title":"Point-m2ae: Multi-scale masked autoencoders for hierarchical point cloud pre-training","volume":"35","author":"Zhang","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2025.108518_bib0064","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"8552","article-title":"Pointclip: Point cloud understanding by clip","author":"Zhang","year":"2022"},{"key":"10.1016\/j.neunet.2025.108518_bib0065","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Liu, L., Li, C. et al. (2017). Quantifying facial age by posterior of age comparisons. arXiv preprint arXiv: 1708.09687.","DOI":"10.5244\/C.31.108"},{"key":"10.1016\/j.neunet.2025.108518_bib0066","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.126819","article-title":"Lra-gnn: Latent relation-aware graph neural network with initial and dynamic residual for facial age estimation","volume":"273","author":"Zhang","year":"2025","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.neunet.2025.108518_bib0067","series-title":"European conference on computer vision","first-page":"418","article-title":"Learn from all: Erasing attention consistency for noisy label facial expression recognition","author":"Zhang","year":"2022"},{"key":"10.1016\/j.neunet.2025.108518_bib0068","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"3146","article-title":"Poster: A pyramid cross-fusion transformer network for facial expression recognition","author":"Zheng","year":"2023"},{"key":"10.1016\/j.neunet.2025.108518_bib0069","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"16816","article-title":"Conditional prompt learning for vision-language models","author":"Zhou","year":"2022"},{"issue":"9","key":"10.1016\/j.neunet.2025.108518_bib0070","doi-asserted-by":"crossref","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","article-title":"Learning to prompt for vision-language models","volume":"130","author":"Zhou","year":"2022","journal-title":"International Journal of Computer Vision"},{"key":"10.1016\/j.neunet.2025.108518_bib0071","series-title":"International conference on machine learning","first-page":"42589","article-title":"Fourmer: An efficient global modeling paradigm for image restoration","author":"Zhou","year":"2023"},{"key":"10.1016\/j.neunet.2025.108518_bib0072","series-title":"12th international conference on learning representations, ICLR 2024","article-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models","author":"Zhu","year":"2024"}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608025013991?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608025013991?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:50:13Z","timestamp":1773967813000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608025013991"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":72,"alternative-id":["S0893608025013991"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2025.108518","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"CILF-CIAE: CLIP-driven image-language fusion for correcting inverse age estimation","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2025.108518","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"108518"}}