{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T20:57:30Z","timestamp":1776113850017,"version":"3.50.1"},"reference-count":73,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&amp;D Program of China","award":["2024YFF0618403"],"award-info":[{"award-number":["2024YFF0618403"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62576216"],"award-info":[{"award-number":["62576216"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangdong-Macao Science and Technology Innovation Joint Fundation","award":["2024A0505090003"],"award-info":[{"award-number":["2024A0505090003"]}]},{"DOI":"10.13039\/501100003453","name":"Natural Science Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2024A1515010194"],"award-info":[{"award-number":["2024A1515010194"]}],"id":[{"id":"10.13039\/501100003453","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangdong Provincial Key Laboratory","award":["2023B1212060076"],"award-info":[{"award-number":["2023B1212060076"]}]},{"name":"Shenzhen Science and Technology Program","award":["JCYJ20220531101412030"],"award-info":[{"award-number":["JCYJ20220531101412030"]}]},{"name":"Shenzhen Science and Technology Program","award":["JCYJ20240813141807010"],"award-info":[{"award-number":["JCYJ20240813141807010"]}]},{"name":"National Engineering Laboratory for BDSC","award":["SZU-BDSC-IF2024-02"],"award-info":[{"award-number":["SZU-BDSC-IF2024-02"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/tmm.2026.3651016","type":"journal-article","created":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T18:39:22Z","timestamp":1767638362000},"page":"2363-2377","source":"Crossref","is-referenced-by-count":0,"title":["HitBack: Transformer With Hierarchical-Semantic Cross Attention and Background Contrast for Weakly Supervised Wildlife Semantic Segmentation"],"prefix":"10.1109","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-5197-0715","authenticated-orcid":false,"given":"Puxuan","family":"Xie","sequence":"first","affiliation":[{"name":"Department of Computer Science, Wenzhou-Kean University, Wenzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5780-0364","authenticated-orcid":false,"given":"Wei","family":"Zhuo","sequence":"additional","affiliation":[{"name":"Computer Vision Institute, School of Artificial Intelligence, Shenzhen University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8907-8258","authenticated-orcid":false,"given":"Xinshao","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Engineering Science, University of Oxford, Oxford, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9843-2083","authenticated-orcid":false,"given":"Songhe","family":"Deng","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Wenzhou-Kean University, Wenzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weizhao","family":"He","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Wenzhou-Kean University, Wenzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1420-0815","authenticated-orcid":false,"given":"Linlin","family":"Shen","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Wenzhou-Kean University, Wenzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Living Planet Report 2020-Bending the Curve of Biodiversity Loss","author":"Almond","year":"2020"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2015.03.156"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4892"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2015.26"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_28"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1111\/2041-210X.13120"},{"key":"ref7","first-page":"1","article-title":"Florida wildlife camera trap dataset","volume-title":"Proc. CVPR workshop Comput. Vis. Animal Behav.","author":"Gagne","year":"2021"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00427"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2644615"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00299"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00699"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1093\/ijl\/3.4.235"},{"key":"ref16","first-page":"1","article-title":"Novel dataset for fine-grained image categorization: Stanford dogs","volume-title":"Proc. CVPR Workshop Fine-Grained Vis. Categorization","author":"Khosla","year":"2011"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20873-8_1"},{"key":"ref18","article-title":"Animals-10 dataset","author":"Alessio","year":"2023"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00546"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_12"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01240"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00190"},{"key":"ref23","first-page":"1","article-title":"The iWildCam 2021 competition dataset","volume-title":"Proc. CVPR workshop Fine-Grained Vis. Categorization (FGVC)","author":"Beery","year":"2021"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-16673-3"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.381"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00691"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2966647"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01639"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00444"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612148"},{"key":"ref31","first-page":"1686","article-title":"Distilling inter-class distance for semantic segmentation","volume-title":"Proc. Int. Joint Conf. Artif. Intell.","author":"Zhang","year":"2022"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6705"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2023.3312325"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01229"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3286254"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2991592"},{"key":"ref37","article-title":"An image is worth 16  16 words: Transformers for image recognition at scale","volume-title":"Proc. ICLR","author":"Dosovitskiy","year":"2021"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3455416"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00302"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.100"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00235"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.07.008"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00106"},{"key":"ref44","article-title":"Grounded SAM: Assembling open-world models for diverse visual tasks","author":"Ren","year":"2024"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref46","article-title":"Label studio: Data labeling software","author":"Tkachenko","year":"2020"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00523"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298780"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02131"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01836"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-26293-7_16"},{"key":"ref54","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2021"},{"key":"ref55","first-page":"19730","article-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li","year":"2023"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00406"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00104"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00425"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00426"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01469"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00108"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01090"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00036"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3359041"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28584"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72946-1_25"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01638"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01634"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73195-2_27"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref73","first-page":"1","article-title":"DINOv2: Learning robust visual features without supervision","author":"Oquab","year":"2024","journal-title":"Trans. Mach. Learn. Res. J."}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6046\/11342315\/11329189.pdf?arnumber=11329189","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:37:20Z","timestamp":1776109040000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11329189\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":73,"URL":"https:\/\/doi.org\/10.1109\/tmm.2026.3651016","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"value":"1520-9210","type":"print"},{"value":"1941-0077","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}