{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T07:53:40Z","timestamp":1776153220710,"version":"3.50.1"},"reference-count":38,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,10,1]],"date-time":"2026-10-01T00:00:00Z","timestamp":1790812800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100018557","name":"Science and Technology Project of Nantong City","doi-asserted-by":"publisher","award":["JC22022061"],"award-info":[{"award-number":["JC22022061"]}],"id":[{"id":"10.13039\/501100018557","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100018557","name":"Science and Technology Project of Nantong City","doi-asserted-by":"publisher","award":["JC2025090"],"award-info":[{"award-number":["JC2025090"]}],"id":[{"id":"10.13039\/501100018557","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100016806","name":"Natural Science Foundation of Nantong Municipality","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100016806","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62002179"],"award-info":[{"award-number":["62002179"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,10]]},"DOI":"10.1016\/j.patcog.2026.113492","type":"journal-article","created":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T23:32:00Z","timestamp":1773531120000},"page":"113492","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["GRU-Enhanced multimodal crowd counting via modal-specific attention"],"prefix":"10.1016","volume":"178","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5113-6296","authenticated-orcid":false,"given":"Jin","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2878-8686","authenticated-orcid":false,"given":"Hongyang","family":"Fu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6732-2567","authenticated-orcid":false,"given":"Yang","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7600-365X","authenticated-orcid":false,"given":"Yingchuan","family":"Zhao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3180-7347","authenticated-orcid":false,"given":"Weiping","family":"Ding","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7826-0422","authenticated-orcid":false,"given":"Jie","family":"Wan","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113492_bib0001","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.122753","article-title":"CAGNet: Coordinated attention guidance network for RGB-T crowd counting","volume":"243","author":"Yang","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.patcog.2026.113492_bib0002","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109585","article-title":"Crowd counting from single images using recursive multi-pathway zooming and foreground enhancement","volume":"141","author":"Ma","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113492_bib0003","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.111073","article-title":"Perspective-assisted prototype-based learning for semi-supervised crowd counting","volume":"158","author":"Qian","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113492_bib0004","series-title":"Proceedings of the Conference on Computer Vision and Pattern Recognition","first-page":"4821","article-title":"Cross-modal collaborative representation learning and a large-scale RGBT benchmark for crowd counting","author":"Liu","year":"2021"},{"key":"10.1016\/j.patcog.2026.113492_bib0005","series-title":"Proceedings of the Asian Conference on Computer Vision","first-page":"497","article-title":"RGB-T Crowd counting from drone: a benchmark and MMCCN network","author":"Peng","year":"2021"},{"key":"10.1016\/j.patcog.2026.113492_bib0006","series-title":"Proceedings of the Conference on Computer Vision and Pattern Recognition","first-page":"1821","article-title":"Density map regression guided detection network for RGB-D crowd counting and localization","author":"Lian","year":"2019"},{"key":"10.1016\/j.patcog.2026.113492_bib0007","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2025.113449","article-title":"Feature correction and semantic guidance for multimodal crowd counting","volume":"181","author":"Wang","year":"2025","journal-title":"Appl. Soft Comput."},{"issue":"5","key":"10.1016\/j.patcog.2026.113492_bib0008","doi-asserted-by":"crossref","first-page":"4156","DOI":"10.1109\/TITS.2023.3321328","article-title":"MC3Net: multimodality cross-guided compensation coordination network for RGB-T crowd counting","volume":"25","author":"Zhou","year":"2024","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.patcog.2026.113492_bib0009","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.112126","article-title":"MAGNet: multi-scale awareness and global fusion network for RGB-D salient object detection","volume":"299","author":"Zhong","year":"2024","journal-title":"Knowl. Based Syst."},{"key":"10.1016\/j.patcog.2026.113492_bib0010","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"428","article-title":"Improving point-based crowd counting and localization based on auxiliary point guidance","author":"Chen","year":"2024"},{"key":"10.1016\/j.patcog.2026.113492_bib0011","series-title":"Proceedings of the International Conference on Computer Vision","first-page":"12284","article-title":"Video individual counting for moving drones","author":"Fan","year":"2025"},{"key":"10.1016\/j.patcog.2026.113492_bib0012","doi-asserted-by":"crossref","first-page":"2750","DOI":"10.1109\/TIP.2025.3555116","article-title":"Scale-aware crowd counting network with annotation error modeling","volume":"34","author":"Hsieh","year":"2025","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.113492_bib0013","doi-asserted-by":"crossref","first-page":"2671","DOI":"10.1007\/s00371-022-02485-3","article-title":"CCST: crowd counting with swin transformer","volume":"39","author":"Li","year":"2023","journal-title":"Vis. Comput."},{"key":"10.1016\/j.patcog.2026.113492_bib0014","series-title":"Proceedings of the International Conference on Pattern Recognition and Artificial Intelligence","first-page":"412","article-title":"Joint contextual transformer and multi-scale information shared network for crowd counting","author":"Zeng","year":"2022"},{"key":"10.1016\/j.patcog.2026.113492_bib0015","doi-asserted-by":"crossref","DOI":"10.1016\/j.image.2022.116915","article-title":"Dilated high-resolution network driven RGB-T multimodal crowd counting","volume":"112","author":"Liu","year":"2023","journal-title":"Signal Process. Image Commun."},{"key":"10.1016\/j.patcog.2026.113492_bib0016","doi-asserted-by":"crossref","first-page":"2593","DOI":"10.1109\/TMM.2025.3535330","article-title":"MISF-Net: modality-Invariant and specific fusion network for RGB-T crowd counting","volume":"27","author":"Mu","year":"2025","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.patcog.2026.113492_bib0017","doi-asserted-by":"crossref","first-page":"24540","DOI":"10.1109\/TITS.2022.3203385","article-title":"DEFNet: dual-branch enhanced feature fusion network for RGB-T crowd counting","volume":"23","author":"Zhou","year":"2022","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.patcog.2026.113492_bib0018","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.112522","article-title":"Multi-modal mamba framework for RGB-T crowd counting with linear complexity","volume":"172","author":"Gan","year":"2026","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113492_bib0019","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2022.119038","article-title":"CSA-Net: cross-modal scale-aware attention-aggregated network for RGB-T crowd counting","volume":"213","author":"Li","year":"2023","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.patcog.2026.113492_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.112768","article-title":"Asymmetric modal fusion for multi-modal crowd counting","volume":"172","author":"Wang","year":"2026","journal-title":"Pattern Recognit."},{"issue":"2","key":"10.1016\/j.patcog.2026.113492_bib0021","doi-asserted-by":"crossref","first-page":"652","DOI":"10.1109\/TPAMI.2019.2938758","article-title":"Res2Net: a new multi-scale backbone architecture","volume":"43","author":"Gao","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"10.1016\/j.patcog.2026.113492_bib0022","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","article-title":"ImageNet large scale visual recognition challenge","volume":"115","author":"Russakovsky","year":"2015","journal-title":"Int. J. Comput. Vis."},{"issue":"8","key":"10.1016\/j.patcog.2026.113492_bib0023","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput."},{"key":"10.1016\/j.patcog.2026.113492_bib0024","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2021.107102","article-title":"Efficient activity recognition using lightweight CNN and DS-GRU network for surveillance applications","volume":"103","author":"Ullah","year":"2021","journal-title":"Appl. Soft Comput."},{"issue":"14","key":"10.1016\/j.patcog.2026.113492_bib0025","doi-asserted-by":"crossref","first-page":"12338","DOI":"10.1109\/JIOT.2021.3135512","article-title":"SafeDriving: an effective abnormal driving behavior detection system based on EMG signals","volume":"9","author":"Fan","year":"2022","journal-title":"IEEE Internet Things J."},{"key":"10.1016\/j.patcog.2026.113492_bib0026","series-title":"Proceedings of the International Joint Conference on Neural Networks","first-page":"1","article-title":"Multiple object tracking with GRU association and Kalman prediction","author":"Lit","year":"2021"},{"key":"10.1016\/j.patcog.2026.113492_bib0027","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"275","article-title":"BBS-Net: RGB-D Salient object detection with a bifurcated backbone strategy network","author":"Fan","year":"2020"},{"key":"10.1016\/j.patcog.2026.113492_bib0028","series-title":"Proceedings of the Conference on Computer Vision and Pattern Recognition","first-page":"6835","article-title":"Correlation-guided attention for corner detection based visual tracking","author":"Du","year":"2020"},{"key":"10.1016\/j.patcog.2026.113492_bib0029","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"235","article-title":"Hierarchical dynamic filtering network for RGB-D salient object detection","author":"Pang","year":"2020"},{"key":"10.1016\/j.patcog.2026.113492_bib0030","series-title":"Proceedings of the International Conference on Computer Vision","first-page":"6141","article-title":"Bayesian loss for crowd count estimation with point supervision","author":"Ma","year":"2019"},{"key":"10.1016\/j.patcog.2026.113492_bib0031","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2022.104592","article-title":"A cross-modal crowd counting method combining CNN and cross-modal transformer","volume":"129","author":"Zhang","year":"2023","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.patcog.2026.113492_bib0032","series-title":"Proceedings of the International Symposium on Circuits and Systems","first-page":"3299","article-title":"TAFNet: a three-stream adaptive fusion network for RGB-T crowd counting","author":"Tang","year":"2022"},{"key":"10.1016\/j.patcog.2026.113492_bib0033","first-page":"1","article-title":"Graph enhancement and transformer aggregation network for RGB-thermal crowd counting","volume":"21","author":"Pan","year":"2024","journal-title":"IEEE Geosci. Remote Sens. Lett."},{"key":"10.1016\/j.patcog.2026.113492_bib0034","series-title":"Proceedings of the British Machine Vision Conference","article-title":"Multi-modal crowd counting via modal emulation","author":"Wang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113492_bib0035","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2023.106885","article-title":"CGINet: cross-modality grade interaction network for RGB-T crowd counting","volume":"126","author":"Pan","year":"2023","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.patcog.2026.113492_bib0036","series-title":"Proceedings of the Conference on Computer Vision and Pattern Recognition","first-page":"1091","article-title":"CSRNet: dilated convolutional neural networks for understanding the highly congested scenes","author":"Li","year":"2018"},{"key":"10.1016\/j.patcog.2026.113492_bib0037","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"757","article-title":"Scale aggregation network for accurate and efficient crowd counting","author":"Cao","year":"2018"},{"key":"10.1016\/j.patcog.2026.113492_bib0038","series-title":"Proceedings of the Conference on Computer Vision and Pattern Recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326004589?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326004589?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T07:05:23Z","timestamp":1776150323000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326004589"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,10]]},"references-count":38,"alternative-id":["S0031320326004589"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113492","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,10]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"GRU-Enhanced multimodal crowd counting via modal-specific attention","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113492","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113492"}}