{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T23:47:22Z","timestamp":1771458442654,"version":"3.50.1"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030208691","type":"print"},{"value":"9783030208707","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-20870-7_27","type":"book-chapter","created":{"date-parts":[[2019,5,24]],"date-time":"2019-05-24T16:14:21Z","timestamp":1558714461000},"page":"435-450","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["Semantic Aware Attention Based Deep Object Co-segmentation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9581-0775","authenticated-orcid":false,"given":"Hong","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8067-6227","authenticated-orcid":false,"given":"Yifei","family":"Huang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8726-2780","authenticated-orcid":false,"given":"Hideki","family":"Nakayama","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,5,25]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"issue":"12","key":"27_CR2","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan, V., Kendall, A., Cipolla, R.: SegNet: a deep convolutional encoder-decoder architecture for image segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 39(12), 2481\u20132495 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Batra, D., Kowdle, A., Parikh, D., Luo, J., Chen, T.: iCoseg: interactive co-segmentation with intelligent scribble guidance. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5540080"},{"issue":"4","key":"27_CR4","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2018","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"27_CR5","doi-asserted-by":"crossref","unstructured":"Chen, L., et al.: SCA-CNN: spatial and channel-wise attention in convolutional networks for image captioning. arXiv preprint arXiv:1611.05594 (2016)","DOI":"10.1109\/CVPR.2017.667"},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"Chen, X., Shrivastava, A., Gupta, A.: Enriching visual knowledge bases via object discovery and segmentation. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.261"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"27_CR8","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., et al.: FlowNet: learning optical flow with convolutional networks. In: CVPR (2015)","DOI":"10.1109\/ICCV.2015.316"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Faktor, A., Irani, M.: Co-segmentation by composition. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.164"},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Gan, C., Li, Y., Li, H., Sun, C., Gong, B.: VQS: linking segmentations to questions and answers for supervised attention in VQA and question-focused semantic segmentation. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.201"},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. arXiv preprint arXiv:1709.01507 (2017)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"27_CR12","doi-asserted-by":"crossref","unstructured":"Huang, Y., Cai, M., Kera, H., Yonetani, R., Higuchi, K., Sato, Y.: Temporal localization and spatial segmentation of joint attention in multiple first-person videos. In: ICCVW (2017)","DOI":"10.1109\/ICCVW.2017.273"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Huang, Y., Cai, M., Li, Z., Sato, Y.: Predicting gaze in egocentric video by learning task-dependent attention transition. arXiv preprint arXiv:1803.09125 (2018)","DOI":"10.1007\/978-3-030-01225-0_46"},{"key":"27_CR14","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. arXiv preprint arXiv:1502.03167 (2015)"},{"key":"27_CR15","unstructured":"Jain, S.D., Xiong, B., Grauman, K.: Pixel objectness. arXiv preprint arXiv:1701.05349 (2017)"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Jerripothula, K.R., Cai, J., Meng, F., Yuan, J.: Automatic image co-segmentation using geometric mean saliency. In: ICIP (2014)","DOI":"10.1109\/ICIP.2014.7025663"},{"issue":"9","key":"27_CR17","doi-asserted-by":"publisher","first-page":"1896","DOI":"10.1109\/TMM.2016.2576283","volume":"18","author":"KR Jerripothula","year":"2016","unstructured":"Jerripothula, K.R., Cai, J., Yuan, J.: Image co-segmentation via saliency co-fusion. IEEE Trans. Multimedia 18(9), 1896\u20131909 (2016)","journal-title":"IEEE Trans. Multimedia"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Joulin, A., Bach, F., Ponce, J.: Discriminative clustering for image co-segmentation. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5539868"},{"key":"27_CR19","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"27_CR20","unstructured":"Li, W., Jafari, O.H., Rother, C.: Deep object co-segmentation. arXiv preprint arXiv:1804.06423 (2018)"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Li, Z., Tao, R., Gavves, E., Snoek, C.G., Smeulders, A., et al.: Tracking by natural language specification. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.777"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"27_CR23","unstructured":"Mukherjee, P., Lall, B., Lattupally, S.: Object cosegmentation using deep Siamese network. arXiv preprint arXiv:1803.02555 (2018)"},{"key":"27_CR24","unstructured":"Paszke, A., et al.: Automatic differentiation in PyTorch (2017)"},{"key":"27_CR25","doi-asserted-by":"crossref","unstructured":"Quan, R., Han, J., Zhang, D., Nie, F.: Object co-segmentation via graph optimized-flexible manifold ranking. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.81"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Rubinstein, M., Joulin, A., Kopf, J., Liu, C.: Unsupervised joint object discovery and segmentation in internet images. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.253"},{"key":"27_CR27","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-CAM: visual explanations from deep networks via gradient-based localization. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"27_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/11744023_1","volume-title":"Computer Vision \u2013 ECCV 2006","author":"J Shotton","year":"2006","unstructured":"Shotton, J., Winn, J., Rother, C., Criminisi, A.: TextonBoost: joint appearance, shape and context modeling for multi-class object recognition and segmentation. In: Leonardis, A., Bischof, H., Pinz, A. (eds.) ECCV 2006. LNCS, vol. 3951, pp. 1\u201315. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11744023_1"},{"key":"27_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"465","DOI":"10.1007\/978-3-642-15552-9_34","volume-title":"Computer Vision \u2013 ECCV 2010","author":"S Vicente","year":"2010","unstructured":"Vicente, S., Kolmogorov, V., Rother, C.: Cosegmentation revisited: models and optimization. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010. LNCS, vol. 6312, pp. 465\u2013479. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15552-9_34"},{"key":"27_CR30","doi-asserted-by":"crossref","unstructured":"Vicente, S., Rother, C., Kolmogorov, V.: Object cosegmentation. In: CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995530"},{"key":"27_CR31","unstructured":"Xu, K., et al.: Show, attend and tell: neural image caption generation with visual attention. In: ICML (2015)"},{"key":"27_CR32","doi-asserted-by":"crossref","unstructured":"Yang, C., Kim, T., Wang, R., Peng, H., Kuo, C.C.J.: Show, attend and translate: unsupervised image translation with self-regularization and attention. arXiv preprint arXiv:1806.06195 (2018)","DOI":"10.1109\/TIP.2019.2914583"},{"key":"27_CR33","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., Smola, A.: Stacked attention networks for image question answering. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.10"},{"key":"27_CR34","unstructured":"Yosinski, J., Clune, J., Nguyen, A., Fuchs, T., Lipson, H.: Understanding neural networks through deep visualization. arXiv preprint arXiv:1506.06579 (2015)"},{"key":"27_CR35","doi-asserted-by":"crossref","unstructured":"Yu, D., Fu, J., Mei, T., Rui, Y.: Multi-level attention networks for visual question answering. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.446"},{"key":"27_CR36","unstructured":"Yu, F., Koltun, V.: Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)"},{"key":"27_CR37","doi-asserted-by":"crossref","unstructured":"Yu, Y., Choi, J., Kim, Y., Yoo, K., Lee, S.H., Kim, G.: Supervising neural attention models for video captioning by human gaze data. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.648"},{"key":"27_CR38","doi-asserted-by":"crossref","unstructured":"Yuan, Z., Lu, T., Wu, Y.: Deep-dense conditional random fields for object co-segmentation. In: IJCAI (2017)","DOI":"10.24963\/ijcai.2017\/471"},{"key":"27_CR39","unstructured":"Zhang, H., Goodfellow, I., Metaxas, D., Odena, A.: Self-attention generative adversarial networks. arXiv preprint arXiv:1805.08318 (2018)"},{"key":"27_CR40","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"27_CR41","doi-asserted-by":"crossref","unstructured":"Zhu, C., Zhao, Y., Huang, S., Tu, K., Ma, Y.: Structured attentions for visual question answering. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.145"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-20870-7_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,18]],"date-time":"2022-09-18T16:20:24Z","timestamp":1663518024000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-20870-7_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030208691","9783030208707"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-20870-7_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"25 May 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Perth, WA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/accv2018.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"979","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"274","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}