{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T06:15:10Z","timestamp":1751436910967},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2023,7,5]],"date-time":"2023-07-05T00:00:00Z","timestamp":1688515200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,7,5]],"date-time":"2023-07-05T00:00:00Z","timestamp":1688515200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s11063-023-11335-9","type":"journal-article","created":{"date-parts":[[2023,7,5]],"date-time":"2023-07-05T20:10:41Z","timestamp":1688587841000},"page":"10483-10499","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["CAM R-CNN: End-to-End Object Detection with Class Activation Maps"],"prefix":"10.1007","volume":"55","author":[{"given":"Shengchuan","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Songlin","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Haixin","family":"Ding","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Liujuan","family":"Cao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,5]]},"reference":[{"key":"11335_CR1","doi-asserted-by":"crossref","unstructured":"Zhang X, Wei Y, Feng J, Yang Y, Huang, T (2018) Adversarial complementary learning for weakly supervised object localization. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1325\u20131334","DOI":"10.1109\/CVPR.2018.00144"},{"key":"11335_CR2","doi-asserted-by":"crossref","unstructured":"Zhou B, Khosla A, Lapedriza A, Oliva A, Torralba A (2016) Learning deep features for discriminative localization. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2921\u20132929","DOI":"10.1109\/CVPR.2016.319"},{"key":"11335_CR3","doi-asserted-by":"crossref","unstructured":"Pang Y, Xie J, Khan MH, Anwer RM, Khan FS, Shao L (2019) Mask-guided attention network for occluded pedestrian detection. In: International conference on computer vision, pp 4967\u20134975","DOI":"10.1109\/ICCV.2019.00507"},{"key":"11335_CR4","doi-asserted-by":"crossref","unstructured":"Zhang S, Yang J, Schiele B (2018) Occluded pedestrian detection through guided attention in cnns. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6995\u20137003","DOI":"10.1109\/CVPR.2018.00731"},{"key":"11335_CR5","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"11335_CR6","doi-asserted-by":"crossref","unstructured":"Sun P, Zhang R, Jiang Y, Kong T, Xu C, Zhan W, Tomizuka M, Li L, Yuan Z, Wang C, Luo P (2021) Sparse r-cnn: end-to-end object detection with learnable proposals. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 14454\u201314463","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"11335_CR7","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Llion\u00a0Jones ANG, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, pp 5998\u20136008"},{"key":"11335_CR8","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft COCO: common objects in context. In: European conference on computer vision, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"11335_CR9","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. In: Advances in neural information processing systems, pp 91\u201399"},{"key":"11335_CR10","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2018) Cascade r-cnn: delving into high quality object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6154\u20136162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"11335_CR11","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"11335_CR12","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"issue":"2","key":"11335_CR13","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JR Uijlings","year":"2013","unstructured":"Uijlings JR, Sande KEVD, Gevers T, Smeulders AW (2013) Selective search for object recognition. Int J Comput Vis 104(2):154\u2013171","journal-title":"Int J Comput Vis"},{"key":"11335_CR14","doi-asserted-by":"crossref","unstructured":"Bodla N, Singh B, Chellappa R, Davis LS (2017) Soft-nms\u2013improving object detection with one line of code. In: Proceedings of the IEEE international conference on computer vision, pp 5561\u20135569","DOI":"10.1109\/ICCV.2017.593"},{"key":"11335_CR15","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7263\u20137271","DOI":"10.1109\/CVPR.2017.690"},{"key":"11335_CR16","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu CY, Berg, AC (2016) SSD: single shot multibox detector. In: European conference on computer vision, pp 21\u201337","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"11335_CR17","doi-asserted-by":"crossref","unstructured":"Lin TY, Goyal P, Girshick R, He K, Doll\u00e1r, P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision, pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.324"},{"key":"11335_CR18","doi-asserted-by":"crossref","unstructured":"Law H, Deng J (2018) Cornernet: detecting objects as paired keypoints. In: Proceedings of the European conference on computer vision, pp 734\u2013750","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"11335_CR19","doi-asserted-by":"crossref","unstructured":"Tian Z, Shen C, Chen H, He T (2019) FCOS: fully convolutional one-stage object detection. In: Proceedings of the IEEE international conference on computer vision, pp 9627\u20139636","DOI":"10.1109\/ICCV.2019.00972"},{"key":"11335_CR20","unstructured":"Zhou X, Wang D, Kr\u00e4henb\u00fchl P (2019) Objects as points. In: arXiv Preprint arXiv:1904.07850"},{"key":"11335_CR21","unstructured":"Zheng M, Gao P, Wang X, Li H, Dong H (2020) End-to-end object detection with adaptive clustering transformer. In: CoRR, Abs\/2011.09315"},{"key":"11335_CR22","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2020) Deformable detr: deformable transformers for end-to-end object detection. In: CoRR, Abs\/2010.04159"},{"key":"11335_CR23","unstructured":"Sun Z, Cao S, Yang Y, Kitani K (2020) Rethinking transformer-based set prediction for object detection. In: CoRR, Abs\/2011.10881, pp 3611\u20133620"},{"key":"11335_CR24","doi-asserted-by":"crossref","unstructured":"Gao P, Zheng M, Wang X, Dai J, Li H (2021) Fast convergence of detr with spatially modulated co-attention. In: CoRR, Abs\/2101.07448, pp 3621\u20133630","DOI":"10.1109\/ICCV48922.2021.00360"},{"key":"11335_CR25","unstructured":"Hu J, Cao L, Lu Y, Zhang S, Wang Y, Li K, Huang F, Shao L, Ji R (2021) ISTR: end-to-end instance segmentation with transformers. In: arXiv Preprint arXiv:2105.00637"},{"key":"11335_CR26","doi-asserted-by":"crossref","unstructured":"Hong Q, Liu F, Li D, Liu J, Tian L, Shan Y (2022) Dynamic sparse r-cnn. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4723\u20134732","DOI":"10.1109\/CVPR52688.2022.00468"},{"key":"11335_CR27","unstructured":"Chen S, Sun P, Song Y, Luo P (2022) Diffusiondet: Diffusion model for object detection. arXiv, 2211.09788"},{"key":"11335_CR28","doi-asserted-by":"crossref","unstructured":"Bell S, Zitnick CL, Bala K, Girshick R (2016) Inside-outside net: detecting objects in context with skip pooling and recurrent neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2874\u20132883","DOI":"10.1109\/CVPR.2016.314"},{"key":"11335_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2023.103800","volume":"92","author":"R Li","year":"2023","unstructured":"Li R, Mai Z, Trabelsi C, Zhang Z, Jang J, Sanner S (2023) Transcam: transformer attention-based cam refinement for weakly supervised semantic segmentation. J Vis Commun Image Represent 92:103800","journal-title":"J Vis Commun Image Represent"},{"key":"11335_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2022.102205","volume":"74","author":"X Zhang","year":"2022","unstructured":"Zhang X, Ma J, Liu H, Hu HM, Yang P (2022) Dual attentional siamese network for visual tracking. Displays 74:102205","journal-title":"Displays"},{"key":"11335_CR31","doi-asserted-by":"crossref","unstructured":"Newell A, Yang K, Deng J (2016) Stacked hourglass networks for human pose estimation. In: Proceedings of European conference on computer vision, pp 483\u2013499","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"11335_CR32","doi-asserted-by":"crossref","unstructured":"Chen L, Zhang H, Xiao J, Nie L, Shao J, Liu W, Chua TS (2017) SCA-CNN: spatial and channel-wise attention in convolutional networks for image captioning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6298\u20136306","DOI":"10.1109\/CVPR.2017.667"},{"key":"11335_CR33","doi-asserted-by":"crossref","unstructured":"Wang F, Jiang M, Qian C, Yang S, Li C, Zhang H, Wang X, Tang X (2017) Residual attention network for image classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6450\u20136458","DOI":"10.1109\/CVPR.2017.683"},{"key":"11335_CR34","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Albanie S, Sun G, Wu E (2020) Squeeze-and-excitation networks. IEEE Transactions on pattern analysis and machine intelligence, 2011\u20132023","DOI":"10.1109\/TPAMI.2019.2913372"},{"key":"11335_CR35","unstructured":"Park J, Woo S, Lee JY, Kweon IS (2018) BAM: bottleneck attention module. In: Proceedings of the British machine vision conference, pp 1\u201314"},{"key":"11335_CR36","doi-asserted-by":"crossref","unstructured":"Woo Park J, Lee JY, Kweon IS (2018) CBAM: convolutional block attention module. In: Proceedings of European conference on computer vision, pp 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"11335_CR37","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"11335_CR38","doi-asserted-by":"crossref","unstructured":"Lin TY, Doll\u00e1r P, Ross\u00a0Girshick KH, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"11335_CR39","unstructured":"Wu Y, Kirillov A, Massa F, Lo WY, Girshick R (2019) Detectron2. In: https:\/\/github.com\/facebookresearch\/detectron2"},{"key":"11335_CR40","doi-asserted-by":"crossref","unstructured":"Lee H, Kim HE, Nam H (2019) SRM: A style-based recalibration module for convolutional neural networks. In: Proceedings of IEEE\/CVF international conference on computer vision, pp 1854\u20131862","DOI":"10.1109\/ICCV.2019.00194"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-023-11335-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-023-11335-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-023-11335-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,22]],"date-time":"2023-11-22T05:06:24Z","timestamp":1700629584000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-023-11335-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,5]]},"references-count":40,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["11335"],"URL":"https:\/\/doi.org\/10.1007\/s11063-023-11335-9","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"value":"1370-4621","type":"print"},{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,7,5]]},"assertion":[{"value":"13 June 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 July 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}