{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T16:26:40Z","timestamp":1778257600513,"version":"3.51.4"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031198175","type":"print"},{"value":"9783031198182","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19818-2_24","type":"book-chapter","created":{"date-parts":[[2022,10,21]],"date-time":"2022-10-21T16:21:10Z","timestamp":1666369270000},"page":"413-429","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["Multi-scale and\u00a0Cross-scale Contrastive Learning for\u00a0Semantic Segmentation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9500-7085","authenticated-orcid":false,"given":"Theodoros","family":"Pissas","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6453-5376","authenticated-orcid":false,"given":"Claudio S.","family":"Ravasio","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7695-6354","authenticated-orcid":false,"given":"Lyndon Da","family":"Cruz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9152-3194","authenticated-orcid":false,"given":"Christos","family":"Bergeles","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,22]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Alonso, I., Sabater, A., Ferstl, D., Montesano, L., Murillo, A.C.: Semi-supervised semantic segmentation with pixel-level contrastive learning from a class-wise memory bank. In: Proceedings of the IEEE International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00811"},{"key":"24_CR2","first-page":"1","volume":"32","author":"P Bachman","year":"2019","unstructured":"Bachman, P., Hjelm, R.D., Buchwalter, W.: Learning representations by maximizing mutual information across views. Adv. Neural. Inf. Process. Syst. 32, 1\u201311 (2019)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Berman, M., Triki, A.R., Blaschko, M.B.: The lov\u00e1sz-softmax loss: a tractable surrogate for the optimization of the intersection-over-union measure in neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018","DOI":"10.1109\/CVPR.2018.00464"},{"key":"24_CR4","first-page":"1","volume":"33","author":"K Chaitanya","year":"2020","unstructured":"Chaitanya, K., Erdil, E., Karani, N., Konukoglu, E.: Contrastive learning of global and local features for medical image segmentation with limited annotations. Adv. Neural. Inf. Process. Syst. 33, 1\u201313 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"24_CR5","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Semantic image segmentation with deep convolutional nets and fully connected CRFs (2016)"},{"key":"24_CR6","unstructured":"Chen, L.C., Papandreou, G., Schroff, F., Adam, H.: Rethinking Atrous convolution for semantic image segmentation (2017)"},{"key":"24_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-030-01234-2_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with Atrous separable convolution for semantic image segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 833\u2013851. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49"},{"key":"24_CR8","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607. PMLR (2020)"},{"key":"24_CR9","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. CoRR abs\/1604.01685 (2016). https:\/\/arxiv.org\/abs\/1604.01685"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Grammatikopoulou, M., et al.: CaDIS: cataract dataset for image segmentation (2020)","DOI":"10.1016\/j.media.2021.102053"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. arXiv preprint arXiv:1911.05722 (2019)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"H\u00e9naff, O.J., Koppula, S., Alayrac, J.B., Oord, A., Vinyals, O., Carreira, J.: Efficient visual pretraining with contrastive detection. In: International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00993"},{"key":"24_CR14","unstructured":"Hjelm, R.D., et al.: Learning deep representations by mutual information estimation and maximization. In: ICLR (2019)"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Hu, H., Cui, J., Wang, L.: Region-aware contrastive learning for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 16291\u201316301, October 2021","DOI":"10.1109\/ICCV48922.2021.01598"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Hwang, J.J., et al.: SegSrt: Segmentation by discriminative sorting of segments. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 7334\u20137344 (2019)","DOI":"10.1109\/ICCV.2019.00743"},{"key":"24_CR17","unstructured":"Kalantidis, Y., Sariyildiz, M.B., Pion, N., Weinzaepfel, P., Larlus, D.: Hard negative mixing for contrastive learning. CoRR abs\/2010.01028 (2020). https:\/\/arxiv.org\/abs\/2010.01028"},{"key":"24_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"605","DOI":"10.1007\/978-3-030-01246-5_36","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T-W Ke","year":"2018","unstructured":"Ke, T.-W., Hwang, J.-J., Liu, Z., Yu, S.X.: Adaptive affinity fields for semantic segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 605\u2013621. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_36"},{"key":"24_CR19","unstructured":"Khosla, P., et al.: Supervised contrastive learning. arXiv preprint arXiv:2004.11362 (2020)"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10012\u201310022, October 2021","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"24_CR21","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR (2019)"},{"issue":"11","key":"24_CR22","first-page":"1","volume":"9","author":"L Van der Maaten","year":"2008","unstructured":"Van der Maaten, L., Hinton, G.: Visualizing data using T-SNE. J. Mach. Learn. Res. 9(11), 1\u201327 (2008)","journal-title":"J. Mach. Learn. Res."},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Mottaghi, R., et al.: The role of context for object detection and semantic segmentation in the wild. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2014)","DOI":"10.1109\/CVPR.2014.119"},{"key":"24_CR24","unstructured":"van den Oord, A., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. CoRR abs\/1807.03748 (2018). https:\/\/arxiv.org\/abs\/1807.03748"},{"key":"24_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1007\/978-3-030-87202-1_49","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2021","author":"T Pissas","year":"2021","unstructured":"Pissas, T., Ravasio, C.S., Da Cruz, L., Bergeles, C.: Effective semantic segmentation in cataract surgery: what matters most? In: de Bruijne, M., et al. (eds.) MICCAI 2021. LNCS, vol. 12904, pp. 509\u2013518. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87202-1_49"},{"key":"24_CR26","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 12179\u201312188, October 2021","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"24_CR27","unstructured":"Saunshi, N., Plevrakis, O., Arora, S., Khodak, M., Khandeparkar, H.: A theoretical analysis of contrastive unsupervised representation learning. In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 97, pp. 5628\u20135637. PMLR, 09\u201315 June 2019. https:\/\/proceedings.mlr.press\/v97\/saunshi19a.html"},{"issue":"4","key":"24_CR28","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1109\/TPAMI.2016.2572683","volume":"39","author":"E Shelhamer","year":"2017","unstructured":"Shelhamer, E., Long, J., Darrell, T.: Fully convolutional networks for semantic segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 39(4), 640\u2013651 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2572683","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Gupta, A., Girshick, R.: Training region-based object detectors with online hard example mining. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016","DOI":"10.1109\/CVPR.2016.89"},{"key":"24_CR30","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: Proceedings of the International Conference on Learning Representations (2015)"},{"key":"24_CR31","doi-asserted-by":"crossref","unstructured":"Strudel, R., Garcia, R., Laptev, I., Schmid, C.: Segmenter: transformer for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 7262\u20137272, October 2021","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"24_CR32","doi-asserted-by":"crossref","unstructured":"Takikawa, T., Acuna, D., Jampani, V., Fidler, S.: Gated-SCNN: gated shape CNNs for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00533"},{"key":"24_CR33","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2019","unstructured":"Wang, J., et al.: Deep high-resolution representation learning for visual recognition. TPAMI. 43, 3349\u20133364 (2019)","journal-title":"TPAMI."},{"key":"24_CR34","unstructured":"Wang, T., Isola, P.: Understanding contrastive representation learning through alignment and uniformity on the hypersphere. In: III, H.D., Singh, A. (eds.) Proceedings of the 37th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 119, pp. 9929\u20139939. PMLR, 13\u201318 July 2020. https:\/\/proceedings.mlr.press\/v119\/wang20k.html"},{"key":"24_CR35","doi-asserted-by":"crossref","unstructured":"Wang, W., Zhou, T., Yu, F., Dai, J., Konukoglu, E., Van Gool, L.: Exploring cross-image pixel contrast for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 7303\u20137313, October 2021","DOI":"10.1109\/ICCV48922.2021.00721"},{"key":"24_CR36","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhang, R., Shen, C., Kong, T., Li, L.: Dense contrastive learning for self-supervised visual pre-training. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.00304"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Wu, Z., Xiong, Y., Yu, S.X., Lin, D.: Unsupervised feature learning via non-parametric instance discrimination. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018","DOI":"10.1109\/CVPR.2018.00393"},{"key":"24_CR38","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"432","DOI":"10.1007\/978-3-030-01228-1_26","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Xiao","year":"2018","unstructured":"Xiao, T., Liu, Y., Zhou, B., Jiang, Y., Sun, J.: Unified perceptual parsing for scene understanding. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11209, pp. 432\u2013448. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01228-1_26"},{"key":"24_CR39","unstructured":"Xiao, T., Wang, X., Efros, A.A., Darrell, T.: What should not be contrastive in contrastive learning. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=CZ8Y3NzuVzO"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"Xie, E., et al.: DetCO: unsupervised contrastive learning for object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 8392\u20138401, October 2021","DOI":"10.1109\/ICCV48922.2021.00828"},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Yu, F., Koltun, V., Funkhouser, T.: Dilated residual networks. In: Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.75"},{"key":"24_CR42","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1007\/978-3-030-58539-6_11","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y Yuan","year":"2020","unstructured":"Yuan, Y., Chen, X., Wang, J.: Object-contextual representations for semantic segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12351, pp. 173\u2013190. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58539-6_11"},{"key":"24_CR43","unstructured":"Zhang, H., et al.: ResNeST: Split-attention networks. arXiv preprint arXiv:2004.08955 (2020)"},{"key":"24_CR44","doi-asserted-by":"crossref","unstructured":"Zhang, Y., He, R., Liu, Z., Lim, K.H., Bing, L.: An unsupervised sentence embedding method by mutual information maximization. In: EMNLP (2021)","DOI":"10.18653\/v1\/2020.emnlp-main.124"},{"key":"24_CR45","unstructured":"Zhao, S., Wang, Y., Yang, Z., Cai, D.: Region mutual information loss for semantic segmentation. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 32. Curran Associates, Inc. (2019). https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/a67c8c9a961b4182688768dd9ba015fe-Paper.pdf"},{"key":"24_CR46","doi-asserted-by":"crossref","unstructured":"Zhao, X., et al.: Contrastive learning for label efficient semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10623\u201310633, October 2021","DOI":"10.1109\/ICCV48922.2021.01045"},{"key":"24_CR47","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/s11263-018-1140-0","volume":"127","author":"B Zhou","year":"2018","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Semantic understanding of scenes through the ade20k dataset. Int. J. Comput. Vision 127, 302\u2013321 (2018)","journal-title":"Int. J. Comput. Vision"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19818-2_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T14:30:03Z","timestamp":1710340203000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19818-2_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031198175","9783031198182"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19818-2_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"22 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}