{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T10:14:20Z","timestamp":1740132860536,"version":"3.37.3"},"reference-count":47,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976233"],"award-info":[{"award-number":["61976233"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangdong Province Basic and Applied Basic Research"},{"name":"Regional Joint Fund-Key","award":["2019B1515120039"],"award-info":[{"award-number":["2019B1515120039"]}]},{"name":"Guangdong Outstanding Youth Fund","award":["2021B1515020061"],"award-info":[{"award-number":["2021B1515020061"]}]},{"DOI":"10.13039\/501100017607","name":"Shenzhen Fundamental Research Program","doi-asserted-by":"publisher","award":["RCYX20200714114642083","JCYJ20190807154211365"],"award-info":[{"award-number":["RCYX20200714114642083","JCYJ20190807154211365"]}],"id":[{"id":"10.13039\/501100017607","id-type":"DOI","asserted-by":"publisher"}]},{"name":"CAAI-Huawei MindSpore Open Fund"},{"name":"MindSpore"},{"name":"Guangdong Provincial Key Laboratory of Fire Science and Intelligent Emergency Technology"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/tmm.2022.3222653","type":"journal-article","created":{"date-parts":[[2022,11,16]],"date-time":"2022-11-16T20:39:39Z","timestamp":1668631179000},"page":"1916-1927","source":"Crossref","is-referenced-by-count":2,"title":["Caption-Aided Product Detection via Collaborative Pseudo-Label Harmonization"],"prefix":"10.1109","volume":"25","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9519-612X","authenticated-orcid":false,"given":"Xiao","family":"Dong","sequence":"first","affiliation":[{"name":"Sun Yat-Sen University, Zhuhai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1823-502X","authenticated-orcid":false,"given":"Gengwei","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Technology Sydney, Sydney, Australia"}]},{"given":"Xunlin","family":"Zhan","sequence":"additional","affiliation":[{"name":"Shenzhen Campus, Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6774-4536","authenticated-orcid":false,"given":"Yi","family":"Ding","sequence":"additional","affiliation":[{"name":"Shenzhen Campus, Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2812-8781","authenticated-orcid":false,"given":"Yunchao","family":"Wei","sequence":"additional","affiliation":[{"name":"Beijing Jiaotong University, Beijing, China"}]},{"given":"Minlong","family":"Lu","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3213-3062","authenticated-orcid":false,"given":"Xiaodan","family":"Liang","sequence":"additional","affiliation":[{"name":"Shenzhen Campus, Sun Yat-sen University, Shenzhen, China"}]}],"member":"263","reference":[{"key":"ref13","first-page":"431","article-title":"Weakly supervised object localization with latent category learning","author":"wang","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01061"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.309"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/2872427.2883037"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.311"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2017.05.001"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref31","article-title":"Learning object detection from captions via textual scene attributes","volume":"abs 2009 14558","author":"jerbi","year":"2020","journal-title":"CoRR"},{"key":"ref30","article-title":"Contrastive visual-linguistic pretraining","volume":"abs 2007 13135","author":"shi","year":"2020","journal-title":"CoRR"},{"key":"ref11","first-page":"570","article-title":"A framework for multiple-instance learning","author":"maron","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref33","article-title":"RP2K: A large-scale retail product dataset forfine-grained image classification","volume":"abs 2006 12634","author":"peng","year":"2020","journal-title":"CoRR"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00978"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3513-y"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313644"},{"key":"ref1","first-page":"525","article-title":"Large-scale multi-class and hierarchical product categorization for an e-commerce giant","author":"cevahir","year":"0","journal-title":"Proc 26th Int Conf Comput Linguistics Tech Papers"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3044997"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/1015706.1015720"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298668"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00838"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2991592"},{"key":"ref24","first-page":"350","article-title":"Contextlocnet: Context-aware deep network models for weakly supervised localization","author":"kantorov","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0620-5"},{"key":"ref23","first-page":"434","article-title":"TS2C: Tight box mining with surrounding segmentation context for weakly supervised object detection","author":"wei","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.89"},{"key":"ref26","first-page":"7005","article-title":"Uwsod: Toward fully-supervised-level capacity weakly supervised object detection","volume":"33","author":"shen","year":"2020","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2876304"},{"key":"ref47","first-page":"391","article-title":"Edge boxes: Locating object proposals from edges","author":"zitnick","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.326"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00312"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00077"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00846"},{"key":"ref44","article-title":"Contrastive learning with hard negative samples","author":"robinson","year":"0","journal-title":"Proc Int Conf Learn Representations (ICLR)"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.457"},{"article-title":"Detectron2","year":"2019","author":"wu","key":"ref43"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1017\/ATSIP.2020.10"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00141"},{"key":"ref29","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"0","journal-title":"Proc North Amer Chapter Assoc Comput Linguistics Human Lang Technol (NAACL-HLT)"},{"key":"ref8","first-page":"13","article-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","author":"lu","year":"0","journal-title":"Proc 33rd Int Conf Neural Inf Process Syst"},{"key":"ref7","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"radford","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref9","article-title":"VL-BERT: Pre-training of generic visual-linguistic representations","author":"su","year":"0","journal-title":"Proc Int Conf Learn Representations (ICLR)"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358170"},{"key":"ref6","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","author":"jia","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref5","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"lin","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.146"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6046\/10016790\/09953558.pdf?arnumber=9953558","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,26]],"date-time":"2023-06-26T18:46:42Z","timestamp":1687805202000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9953558\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/tmm.2022.3222653","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"type":"print","value":"1520-9210"},{"type":"electronic","value":"1941-0077"}],"subject":[],"published":{"date-parts":[[2023]]}}}