{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T17:46:38Z","timestamp":1764783998229,"version":"3.37.3"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2017,9,1]],"date-time":"2017-09-01T00:00:00Z","timestamp":1504224000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61525206","61572472"],"award-info":[{"award-number":["61525206","61572472"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"crossref","award":["2016YFB0800403"],"award-info":[{"award-number":["2016YFB0800403"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["4152050"],"award-info":[{"award-number":["4152050"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014380","name":"Beijing Advanced Innovation Center for Imaging Technology","doi-asserted-by":"crossref","award":["BAICIT-2016009"],"award-info":[{"award-number":["BAICIT-2016009"]}],"id":[{"id":"10.13039\/100014380","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1109\/tmm.2017.2729786","type":"journal-article","created":{"date-parts":[[2017,7,20]],"date-time":"2017-07-20T18:09:50Z","timestamp":1500574190000},"page":"2105-2116","source":"Crossref","is-referenced-by-count":19,"title":["Object Localization Based on Proposal Fusion"],"prefix":"10.1109","volume":"19","author":[{"given":"Sheng","family":"Tang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lixi","family":"Deng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yongdong","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2015.2415497"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2014.09.100"},{"key":"ref33","first-page":"379","article-title":"R-fcn: Object detection via region- based fully convolutional networks","author":"dai","year":"2016"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.98"},{"key":"ref31","article-title":"A multipath network for object detection","author":"zagoruyko","year":"2016","journal-title":"Proc Brit Mach Vis Conf"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.314"},{"key":"ref37","article-title":"TRECVID 2008 High-Level Feature Extraction By MCG-ICT-CAS","author":"tang","year":"0","journal-title":"Proc TRECVID 2008 Workshop"},{"key":"ref36","first-page":"740","article-title":"Microsoft COCO: Common objects in context","author":"lin","year":"2014","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.89"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.135"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.414"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2601099"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref1","first-page":"630","article-title":"Identity mappings in deep residual networks","author":"he","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.68"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.177"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2010.2041828"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.167"},{"key":"ref26","first-page":"391","article-title":"Edge boxes: Locating object proposals from edges","author":"zitnick","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0620-5"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.49"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2537320"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.305"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/2.781637"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.276"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.28"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540226"},{"key":"ref10","first-page":"346","article-title":"Spatial pyramid pooling in deep convolutional networks for visual recognition","author":"he","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2010.2041101"},{"key":"ref12","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2011.2168198"},{"key":"ref14","first-page":"4133","article-title":"Image caption with global-local attention","author":"li","year":"2017","journal-title":"Proc of the Thirty-First AAAI Conf Artificial Intelligence Feb 4-9 2017"},{"key":"ref15","article-title":"Global-residual and localboundary refinement networks for rectifying scene parsing predictions","author":"zhang","year":"2017","journal-title":"The 26th Int Joint Conf Artificial Intelligence (IJCAI-2017) Aug 19-25 2017"},{"key":"ref16","article-title":"Overfeat: Integrated recognition, localization and detection using convolutional networks","author":"sermanet","year":"2013","journal-title":"CoRR"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459257"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1023\/A:1008162616689"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref3","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"Proceedings of the 32nd Intl Conf on Machine Learning"},{"key":"ref6","article-title":"Network in network","author":"lin","year":"2013","journal-title":"CoRR"},{"key":"ref5","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"0","journal-title":"CoRR"},{"key":"ref8","first-page":"248","article-title":"Imagenet: A large-scale hierarchical image database","author":"deng","year":"0","journal-title":"Proc IEEE Comput Soc Conf Comput Vis Pattern Recog"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540063"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2437384"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0060-1"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.310"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126456"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2465908"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-04114-8_18"},{"key":"ref41","doi-asserted-by":"crossref","first-page":"1047","DOI":"10.1109\/TIP.2014.2298982","article-title":"Learning and recognition of on-premise signs from weakly labeled street view images","volume":"23","author":"tsai","year":"0","journal-title":"IEEE Trans Image Process"},{"key":"ref44","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1023\/A:1018628609742","article-title":"Least squares support vector machine classifiers","volume":"9","author":"suykens","year":"0","journal-title":"Neural Process Lett"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2015.12.006"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6046\/8010475\/07987087.pdf?arnumber=7987087","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:03:48Z","timestamp":1642003428000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7987087\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,9]]},"references-count":56,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tmm.2017.2729786","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"type":"print","value":"1520-9210"},{"type":"electronic","value":"1941-0077"}],"subject":[],"published":{"date-parts":[[2017,9]]}}}