{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T21:53:18Z","timestamp":1776203598195,"version":"3.50.1"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012335","type":"print"},{"value":"9783030012342","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01234-2_1","type":"book-chapter","created":{"date-parts":[[2018,10,5]],"date-time":"2018-10-05T16:13:11Z","timestamp":1538755991000},"page":"3-19","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16623,"title":["CBAM: Convolutional Block Attention Module"],"prefix":"10.1007","author":[{"given":"Sanghyun","family":"Woo","sequence":"first","affiliation":[]},{"given":"Jongchan","family":"Park","sequence":"additional","affiliation":[]},{"given":"Joon-Young","family":"Lee","sequence":"additional","affiliation":[]},{"given":"In So","family":"Kweon","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: Proceedings of the Computer Vision and Pattern Recognition (CVPR) (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1_CR2","unstructured":"Krizhevsky, A., Hinton, G.: Learning multiple layers of features from tiny images"},{"key":"1_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part V. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"issue":"11","key":"1_CR4","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Zagoruyko, S., Komodakis, N.: Wide residual networks. arXiv preprint arXiv:1605.07146 (2016)","DOI":"10.5244\/C.30.87"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., He, K.: Aggregated residual transformations for deep neural networks. arXiv preprint arXiv:1611.05431 (2016)","DOI":"10.1109\/CVPR.2017.634"},{"key":"1_CR8","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V., Alemi, A.A.: Inception-v4, inception-ResNet and the impact of residual connections on learning. In: Proceedings of the Association for the Advancement of Artificial Intelligence (AAAI) (2017)","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"1_CR9","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., Sermanet, P., Reed, S., Anguelov, D., Erhan, D., Vanhoucke, V., Rabinovich, A.: Going deeper with convolutions. In: Proceedings of the Computer Vision and Pattern Recognition (CVPR) (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Chollet, F.: Xception: Deep learning with depthwise separable convolutions. arXiv preprint arXiv:1610.02357 (2016)","DOI":"10.1109\/CVPR.2017.195"},{"key":"1_CR12","unstructured":"Mnih, V., Heess, N., Graves, A., et al.: Recurrent models of visual attention. In: Proceedings of the Neural Information Processing Systems (NIPS). Advances in Neural Information Processing Systems (2014)"},{"key":"1_CR13","unstructured":"Ba, J., Mnih, V., Kavukcuoglu, K.: Multiple object recognition with visual attention (2014)"},{"key":"1_CR14","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate (2014)"},{"key":"1_CR15","unstructured":"Xu, K., Ba, J., Kiros, R., Cho, K., Courville, A., Salakhudinov, R., Zemel, R., Bengio, Y.: Show, attend and tell: neural image caption generation with visual attention (2015)"},{"key":"1_CR16","unstructured":"Gregor, K., Danihelka, I., Graves, A., Rezende, D.J., Wierstra, D.: DRAW: a recurrent neural network for image generation (2015)"},{"key":"1_CR17","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et al.: Spatial transformer networks. In: Proceedings of the Neural Information Processing Systems (NIPS) (2015)"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-CAM: visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"1_CR19","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: Proceedings of the Neural Information Processing Systems (NIPS) (2012)"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Han, D., Kim, J., Kim, J.: Deep pyramidal residual networks. In: Proceedings of the Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.668"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Weinberger, K.Q., van der Maaten, L.: Densely connected convolutional networks. arXiv preprint arXiv:1608.06993 (2016)","DOI":"10.1109\/CVPR.2017.243"},{"key":"1_CR22","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.308"},{"issue":"11","key":"1_CR23","doi-asserted-by":"publisher","first-page":"1254","DOI":"10.1109\/34.730558","volume":"20","author":"L. Itti","year":"1998","unstructured":"Itti, L., Koch, C., Niebur, E.: A model of saliency-based visual attention for rapid scene analysis. In: IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI) (1998)","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1\u20133","key":"1_CR24","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1080\/135062800394667","volume":"7","author":"RA Rensink","year":"2000","unstructured":"Rensink, R.A.: The dynamic representation of scenes. Vis. Cogn. 7(1\u20133), 17\u201342 (2000)","journal-title":"Vis. Cogn."},{"issue":"3","key":"1_CR25","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1038\/nrn755","volume":"3","author":"M Corbetta","year":"2002","unstructured":"Corbetta, M., Shulman, G.L.: Control of goal-directed and stimulus-driven attention in the brain. Nat. Rev. Neurosci. 3(3), 201\u2013215 (2002)","journal-title":"Nat. Rev. Neurosci."},{"key":"1_CR26","unstructured":"Larochelle, H., Hinton, G.E.: Learning to combine foveal glimpses with a third-order Boltzmann machine. In: Proceedings of the Neural Information Processing Systems (NIPS) (2010)"},{"key":"1_CR27","doi-asserted-by":"crossref","unstructured":"Wang, F., Jiang, M., Qian, C., Yang, S., Li, C., Zhang, H., Wang, X., Tang, X.: Residual attention network for image classification. arXiv preprint arXiv:1704.06904 (2017)","DOI":"10.1109\/CVPR.2017.683"},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. arXiv preprint arXiv:1709.01507 (2017)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1_CR29","doi-asserted-by":"crossref","unstructured":"Chen, L., Zhang, H., Xiao, J., Nie, L., Shao, J., Chua, T.S.: SCA-CNN: spatial and channel-wise attention in convolutional networks for image captioning. In: Proceedings of the Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.667"},{"key":"1_CR30","unstructured":"Sanghyun, W., Soonmin, H., So, K.I.: StairNet: top-down semantic aggregation for accurate one shot detection. In: Proceedings of the Winter Conference on Applications of Computer Vision (WACV) (2018)"},{"key":"1_CR31","unstructured":"Park, J., Woo, S., Lee, J.Y., Kweon, I.S.: BAM: bottleneck attention module. In: Proceedings of the British Machine Vision Conference (BMVC) (2018)"},{"key":"1_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1007\/978-3-319-10590-1_53","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MD Zeiler","year":"2014","unstructured":"Zeiler, M.D., Fergus, R.: Visualizing and understanding convolutional networks. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part I. LNCS, vol. 8689, pp. 818\u2013833. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10590-1_53"},{"key":"1_CR33","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., Torralba, A.: Learning deep features for discriminative localization. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2921\u20132929. IEEE (2016)","DOI":"10.1109\/CVPR.2016.319"},{"key":"1_CR34","unstructured":"Zagoruyko, S., Komodakis, N.: Paying more attention to attention: improving the performance of convolutional neural networks via attention transfer. In: ICLR (2017)"},{"key":"1_CR35","unstructured":"Howard, A.G., Zhu, M., Chen, B., Kalenichenko, D., Wang, W., Weyand, T., Andreetto, M., Adam, H.: MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"key":"1_CR36","unstructured":": PyTorch. http:\/\/pytorch.org\/. Accessed 08 Nov 2017"},{"key":"1_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1007\/978-3-319-46493-0_38","volume-title":"Computer Vision \u2013 ECCV 2016","author":"K He","year":"2016","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Identity mappings in deep residual networks. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part IV. LNCS, vol. 9908, pp. 630\u2013645. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_38"},{"key":"1_CR38","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"646","DOI":"10.1007\/978-3-319-46493-0_39","volume-title":"Computer Vision \u2013 ECCV 2016","author":"G Huang","year":"2016","unstructured":"Huang, G., Sun, Y., Liu, Z., Sedra, D., Weinberger, K.Q.: Deep networks with stochastic depth. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part IV. LNCS, vol. 9908, pp. 646\u2013661. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_39"},{"key":"1_CR39","doi-asserted-by":"crossref","unstructured":"Bell, S., Lawrence Zitnick, C., Bala, K., Girshick, R.: Inside-outside net: detecting objects in context with skip pooling and recurrent neural networks. In: Proceedings of the Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.314"},{"key":"1_CR40","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu, W., et al.: SSD: single shot multibox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part I. LNCS, vol. 9905, pp. 21\u201337. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2"},{"key":"1_CR41","unstructured":"Chen, X., Gupta, A.: An implementation of faster RCNN with study for region sampling. arXiv preprint arXiv:1702.02138 (2017)"},{"key":"1_CR42","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Proceedings of the Neural Information Processing Systems (NIPS) (2015)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01234-2_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,5]],"date-time":"2022-10-05T00:18:06Z","timestamp":1664929086000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01234-2_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012335","9783030012342"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01234-2_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}