{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T02:51:45Z","timestamp":1764557505760},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2019,5,27]],"date-time":"2019-05-27T00:00:00Z","timestamp":1558915200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,5,27]],"date-time":"2019-05-27T00:00:00Z","timestamp":1558915200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1007\/s11263-019-01184-2","type":"journal-article","created":{"date-parts":[[2019,5,27]],"date-time":"2019-05-27T11:02:45Z","timestamp":1558954965000},"page":"1321-1344","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Click Carving: Interactive Object Segmentation in Images and Videos with Point Clicks"],"prefix":"10.1007","volume":"127","author":[{"given":"Suyog Dutt","family":"Jain","sequence":"first","affiliation":[]},{"given":"Kristen","family":"Grauman","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,5,27]]},"reference":[{"key":"1184_CR1","doi-asserted-by":"crossref","unstructured":"Acuna, D., Ling, H., Kar, A., & Fidler, S. (2018). Efficient interactive annotation of segmentation datasets with polygon-rnn++.","DOI":"10.1109\/CVPR.2018.00096"},{"key":"1184_CR2","doi-asserted-by":"crossref","unstructured":"Arbel\u00e1ez, P., Pont-Tuset, J., Barron, J., Marques, F., & Malik, J. (2014). Multiscale combinatorial grouping. In CVPR.","DOI":"10.1109\/CVPR.2014.49"},{"key":"1184_CR3","doi-asserted-by":"crossref","unstructured":"Badrinarayanan, V., Galasso, F., & Cipolla, R. (2010). Label propagation in video sequences. In CVPR.","DOI":"10.1109\/CVPR.2010.5540054"},{"key":"1184_CR4","doi-asserted-by":"crossref","unstructured":"Bai, X., & Sapiro, G. (2007). Distancecut: Interactive segmentation and matting of images and videos. In 2007 IEEE international conference on image processing.","DOI":"10.1109\/ICIP.2007.4379139"},{"key":"1184_CR5","doi-asserted-by":"crossref","unstructured":"Bai, X., Wang, J., Simons, D., & Sapiro, G. (2009) Video snapcut: Robust video object cutout using localized classifiers. In SIGGRAPH.","DOI":"10.1145\/1576246.1531376"},{"key":"1184_CR6","doi-asserted-by":"crossref","unstructured":"Batra, D., Kowdle, A., Parikh, D., Luo, J., & Chen, T. (2010). iCoseg: Interactive co-segmentation with intelligent scribble guidance. In CVPR.","DOI":"10.1109\/CVPR.2010.5540080"},{"key":"1184_CR7","unstructured":"Bearman, A., Russakovsky, O., Ferrari, V., & Fei-Fei, L. (2015). What\u2019s the point: Semantic segmentation with point supervision. ArXiv e-prints."},{"key":"1184_CR8","doi-asserted-by":"crossref","unstructured":"Bell, S., Upchurch, P., Snavely, N., & Bala, K. (2015). Material recognition in the wild with the materials in context database. In Computer Vision and Pattern Recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298970"},{"key":"1184_CR9","doi-asserted-by":"crossref","unstructured":"Boykov, Y., & Jolly, M. (2001). Interactive graph cuts for optimal boundary and region segmentation of objects in N-D images. In CVPR.","DOI":"10.1109\/ICCV.2001.937505"},{"issue":"7","key":"1184_CR10","doi-asserted-by":"publisher","first-page":"1312","DOI":"10.1109\/TPAMI.2011.231","volume":"34","author":"J Carreira","year":"2012","unstructured":"Carreira, J., & Sminchisescu, C. (2012). CPMC: Automatic object segmentation using constrained parametric min-cuts. PAMI, 34(7), 1312\u20131328.","journal-title":"PAMI"},{"key":"1184_CR11","doi-asserted-by":"crossref","unstructured":"Castrej\u00f3n, L., Kundu, K., Urtasun, R., & Fidler, S. (2017). Annotating object instances with a polygon-rnn. In CVPR.","DOI":"10.1109\/CVPR.2017.477"},{"key":"1184_CR12","unstructured":"Chen, L.-C., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A.\u00a0L. (2015). Semantic image segmentation with deep convolutional nets and fully connected crfs. In ICLR."},{"key":"1184_CR13","doi-asserted-by":"crossref","unstructured":"Cheng, M.-M., Zhang, G.-X., Mitra, N.\u00a0J., Huang, X., & Hu, S.-M. (2011). Global contrast based salient region detection. In CVPR (pp. 409\u2013416).","DOI":"10.1109\/CVPR.2011.5995344"},{"key":"1184_CR14","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., & Schiele, B. (2016). The cityscapes dataset for semantic urban scene understanding. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.350"},{"key":"1184_CR15","doi-asserted-by":"crossref","unstructured":"Faktor, A., & Irani, M. (2014). Video segmentation by non-local consensus voting. In Proceedings of the British machine vision conference. BMVA Press.","DOI":"10.5244\/C.28.21"},{"key":"1184_CR16","doi-asserted-by":"crossref","unstructured":"Fathi, A., Balcan, M., Ren, X., & Rehg, J. (2011). Combining self training and active learning for video segmentation. In BMVC.","DOI":"10.5244\/C.25.78"},{"key":"1184_CR17","doi-asserted-by":"crossref","unstructured":"Fragkiadaki, K., Arbelaez, P., Felsen, P., & Malik, J. (2015). Learning to segment moving objects in videos. In CVPR.","DOI":"10.1109\/CVPR.2015.7299035"},{"key":"1184_CR18","doi-asserted-by":"crossref","unstructured":"Galasso, F., Nagaraja, N. S., Cardenas, T. J., Brox, T., & Schiele, B. (2013). A unified video segmentation benchmark: Annotation, metrics and analysis. In ICCV.","DOI":"10.1109\/ICCV.2013.438"},{"key":"1184_CR19","doi-asserted-by":"crossref","unstructured":"Godec, M., Roth, P.\u00a0M., & Bischof, H. (2011). Hough-based tracking of non-rigid objects. In ICCV.","DOI":"10.1109\/ICCV.2011.6126228"},{"key":"1184_CR20","doi-asserted-by":"crossref","unstructured":"Grundmann, M., Kwatra, V., Han, M., & Essa, I. (2010). Efficient hierarchical graph based video segmentation. In CVPR.","DOI":"10.1109\/CVPR.2010.5539893"},{"key":"1184_CR21","doi-asserted-by":"crossref","unstructured":"Gulshan, V., Rother, C., Criminisi, A., Blake, A., & Zisserman, A. (2010). Geodesic star convexity for interactive image segmentation. In CVPR.","DOI":"10.1109\/CVPR.2010.5540073"},{"key":"1184_CR22","doi-asserted-by":"crossref","unstructured":"Jain, S., & Grauman, K. (2013). Predicting sufficient annotation strength for interactive foreground segmentation. In ICCV.","DOI":"10.1109\/ICCV.2013.166"},{"key":"1184_CR23","doi-asserted-by":"crossref","unstructured":"Jain, S.\u00a0D., & Grauman, K. (2014). Supervoxel-consistent foreground propagation in video. In ECCV 2014. Lecture notes in computer science (pp. 656\u2013671). Springer.","DOI":"10.1007\/978-3-319-10593-2_43"},{"key":"1184_CR24","doi-asserted-by":"crossref","unstructured":"Jain, S.\u00a0D., & Grauman, K. (2016). Click carving: Segmenting objects in video with point clicks. In AAAI conference on human computation and crowdsourcing (HCOMP).","DOI":"10.1609\/hcomp.v4i1.13288"},{"key":"1184_CR25","doi-asserted-by":"crossref","unstructured":"Jiang, B., Zhang, L., Lu, H., Yang, C., & Yang, M.-H. (2013). Saliency detection via absorbing markov chain. In ICCV.","DOI":"10.1109\/ICCV.2013.209"},{"key":"1184_CR26","doi-asserted-by":"crossref","unstructured":"Karasev, V., Ravichandran, A., & Soatto, S. (2014). Active frame, location, and detector selection for automated and manual video annotation. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2014.273"},{"key":"1184_CR27","doi-asserted-by":"crossref","unstructured":"Kass, M., Witkin, A., & Terzopoulos, D. (1988). Snakes: Active contour models. In IJCV (pp. 321\u2013331).","DOI":"10.1007\/BF00133570"},{"issue":"3","key":"1184_CR28","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/s11263-012-0537-4","volume":"100","author":"P Kohli","year":"2012","unstructured":"Kohli, P., Nickisch, H., Rother, C., & Rhemann, C. (2012). User-centric learning and evaluation of interactive segmentation systems. IJCV, 100(3), 261\u2013274.","journal-title":"IJCV"},{"key":"1184_CR29","unstructured":"Kr\u00e4henb\u00fchl, P., & Koltun, V. (2014). In Computer vision\u2014ECCV 2014: 13th European conference, Zurich, Switzerland, September 6\u201312, 2014, proceedings, part V, chapter geodesic object proposals (pp. 725\u2013739). Cham: Springer."},{"key":"1184_CR30","unstructured":"Krause, A., & Guestrin, C. (2007). Near-optimal observation selection using submodular functions. In National conference on artificial intelligence (AAAI), nectar track."},{"key":"1184_CR31","doi-asserted-by":"crossref","unstructured":"Lee, Y. J., Kim, J., & Grauman, K. (2011). Key-segments for video object segmentation. In ICCV.","DOI":"10.1109\/ICCV.2011.6126471"},{"key":"1184_CR32","doi-asserted-by":"crossref","unstructured":"Lempitsky, V. S., Kohli, P., Rother, C., & Sharp, T. (2009). Image segmentation with a bounding box prior. In ICCV","DOI":"10.1109\/ICCV.2009.5459262"},{"key":"1184_CR33","unstructured":"Levinkov, E., Tompkin, J., Bonneel, N., Kirchhoff, S., Andres, B., & Pfister, H. (2016). Interactive multicut video segmentation. In Proceedings of the 24th Pacific conference on computer graphics and applications: Short papers (pp. 33\u201338)."},{"key":"1184_CR34","doi-asserted-by":"crossref","unstructured":"Li, F., Kim, T., Humayun, A., Tsai, D., & Rehg, J.\u00a0M. (2013). Video segmentation by tracking many figure-ground segments. In ICCV.","DOI":"10.1109\/ICCV.2013.273"},{"issue":"8","key":"1184_CR35","first-page":"3919","volume":"25","author":"X Li","year":"2016","unstructured":"Li, X., Zhao, L., Wei, L., Yang, M.-H., Fei, W., Zhuang, Y., et al. (2016). DeepSaliency: Multi-task deep neural network model for salient object detection. IEEE TIP, 25(8), 3919\u20133930.","journal-title":"IEEE TIP"},{"key":"1184_CR36","doi-asserted-by":"crossref","unstructured":"Li, Y., Hou, X., Koch, C., Rehg, J.\u00a0M., & Yuille, A.\u00a0L. (2014). The secrets of salient object segmentation. In CVPR.","DOI":"10.1109\/CVPR.2014.43"},{"key":"1184_CR37","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C. L. (2014). Microsoft COCO: Common objects in context. In ECCV.","DOI":"10.1007\/978-3-319-10602-1_48"},{"issue":"2","key":"1184_CR38","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1109\/TPAMI.2010.70","volume":"33","author":"T Liu","year":"2011","unstructured":"Liu, T., Yuan, Z., Sun, J., Wang, J., Zheng, N., Tang, X., et al. (2011). Learning to detect a salient object. PAMI, 33(2), 353\u2013367.","journal-title":"PAMI"},{"key":"1184_CR39","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., & Darrell, T. (2015). Fully convolutional networks for semantic segmentation. In CVPR.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1184_CR40","unstructured":"Ma, T., & Latecki, L. (2012). Maximum weight cliques with mutex constraints for video object segmentation. In CVPR."},{"key":"1184_CR41","doi-asserted-by":"crossref","unstructured":"Malisiewicz, T., & Efros, A.\u00a0A. (2007). Spatial support for objects via multiple segmentations. In BMVC.","DOI":"10.5244\/C.21.55"},{"key":"1184_CR42","doi-asserted-by":"crossref","unstructured":"Malmberg, F., Strand, R., & Nystr\u00f6m, I. (2011). Generalized hard constraints for graph segmentation. In SCIA.","DOI":"10.1007\/978-3-642-21227-7_4"},{"issue":"2","key":"1184_CR43","doi-asserted-by":"publisher","first-page":"434","DOI":"10.1016\/j.patcog.2009.03.008","volume":"43","author":"K McGuinness","year":"2010","unstructured":"McGuinness, K., & O\u2019Connor, N. E. (2010). A comparative evaluation of interactive segmentation algorithms. Pattern Recognition, 43(2), 434\u2013444. Interactive Imaging and Vision.","journal-title":"Pattern Recognition"},{"key":"1184_CR44","doi-asserted-by":"crossref","unstructured":"Mortensen, E., & Barrett, W. (1995). Intelligent scissors for image composition. In SIGGRAPH.","DOI":"10.1145\/218380.218442"},{"key":"1184_CR45","unstructured":"Nickisch, H., Rother, C., Kohli, P., & Rhemann, C. (2010). Learning an interactive segmentation system. In Proceedings of the seventh Indian conference on computer vision, graphics and image processing, ICVGIP \u201910 (pp. 274\u2013281). New York, NY: ACM."},{"key":"1184_CR46","doi-asserted-by":"crossref","unstructured":"Noh, H., Hong, S., & Han, B. (2015). Learning deconvolution network for semantic segmentation. In 2015 IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.178"},{"key":"1184_CR47","doi-asserted-by":"crossref","unstructured":"Oneata, D., Revaud, J., Verbeek, J., & Schmid, C. (2014). Spatio-temporal object detection proposals. In ECCV.","DOI":"10.1007\/978-3-319-10578-9_48"},{"key":"1184_CR48","doi-asserted-by":"crossref","unstructured":"Papadopoulos, D., Uijlings, J., Keller, F., & Ferrari, V. (2017). Training object class detectors with click supervision. In CVPR.","DOI":"10.1109\/CVPR.2017.27"},{"key":"1184_CR49","doi-asserted-by":"crossref","unstructured":"Papazoglou, A., & Ferrari, V. (2013). Fast object segmentation in unconstrained video. In ICCV.","DOI":"10.1109\/ICCV.2013.223"},{"key":"1184_CR50","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Kr\u00e4henb\u00fchl, P., Pritch, Y., & Hornung, A. (2012). Saliency filters: Contrast based filtering for salient region detection. In CVPR (pp. 733\u2013740).","DOI":"10.1109\/CVPR.2012.6247743"},{"key":"1184_CR51","unstructured":"Pinheiro, P. O., Collobert, R., & Doll\u00e1r, P. (2015). Learning to segment object candidates. In NIPS"},{"key":"1184_CR52","doi-asserted-by":"crossref","unstructured":"Pont-Tuset, J., Farr\u00e9, M. A., & Smolic, A. (2015). Semi-automatic video object segmentation by advanced manipulation of segmentation hierarchies. In International workshop on content-based multimedia indexing (CBMI).","DOI":"10.1109\/CBMI.2015.7153600"},{"key":"1184_CR53","doi-asserted-by":"crossref","unstructured":"Ren, X., & Malik, J. (2007). Tracking as repeated figure\/ground segmentation. In CVPR.","DOI":"10.1109\/CVPR.2007.383177"},{"key":"1184_CR54","doi-asserted-by":"crossref","unstructured":"Rother, C., Kolmogorov, V., & Blake, A. (2004). Grabcut-interactive foreground extraction using iterated graph cuts. In SIGGRAPH.","DOI":"10.1145\/1015706.1015720"},{"key":"1184_CR55","doi-asserted-by":"crossref","unstructured":"Russakovsky, O., Li, L.-J., & Fei-Fei, L. (2015). Best of both worlds: Human\u2013machine collaboration for object annotation. In CVPR.","DOI":"10.1109\/CVPR.2015.7298824"},{"key":"1184_CR56","unstructured":"Shankar\u00a0Nagaraja, N., Schmidt, F.\u00a0R., & Brox, T. (2015). Video segmentation with just a few strokes. In ICCV."},{"key":"1184_CR57","doi-asserted-by":"crossref","unstructured":"Sundberg, P., Brox, T., Maire, M., Arbelaez, P., & Malik, J. (2011). Occlusion boundary detection and figure\/ground assignment from optical flow. In CVPR, Washington, DC, USA.","DOI":"10.1109\/CVPR.2011.5995364"},{"key":"1184_CR58","doi-asserted-by":"crossref","unstructured":"Tsai, D., Flagg, M., & Rehg, J. (2010). Motion coherent tracking with multi-label mrf optimization. In BMVC.","DOI":"10.5244\/C.24.56"},{"key":"1184_CR59","unstructured":"The OpenCV reference manual, 2.4.9.0 edition, April 2014."},{"issue":"2","key":"1184_CR60","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JRR Uijlings","year":"2013","unstructured":"Uijlings, J. R. R., van de Sande, K. E. A., Gevers, T., & Smeulders, A. W. M. (2013). Selective search for object recognition. International Journal of Computer Vision, 104(2), 154\u2013171.","journal-title":"International Journal of Computer Vision"},{"key":"1184_CR61","doi-asserted-by":"crossref","unstructured":"Vijayanarasimhan, S., & Grauman, K. (2012). Active frame selection for label propagation in videos. In ECCV.","DOI":"10.1007\/978-3-642-33715-4_36"},{"key":"1184_CR62","unstructured":"Vondrick, C., & Ramanan, D. (2011). Video annotation and tracking with active learning. In NIPS."},{"issue":"3","key":"1184_CR63","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1145\/1073204.1073233","volume":"24","author":"J Wang","year":"2005","unstructured":"Wang, J., Bhat, P., Colburn, A., Agrawala, M., & Cohen, M. F. (2005). Interactive video cutout. ACM Transactions on Graphics, 24(3), 585\u2013594.","journal-title":"ACM Transactions on Graphics"},{"key":"1184_CR64","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.cviu.2013.10.013","volume":"120","author":"T Wang","year":"2014","unstructured":"Wang, T., Han, B., & Collomosse, J. (2014). Touchcut: Fast image and video segmentation using single-touch interaction. Computer Vision and Image Understanding, 120, 14\u201330.","journal-title":"Computer Vision and Image Understanding"},{"key":"1184_CR65","doi-asserted-by":"crossref","unstructured":"Weinzaepfel, P., Revaud, J., Harchaoui, Z., & Schmid, C. (2015). Learning to detect motion boundaries. In CVPR 2015, Boston, United States.","DOI":"10.1109\/CVPR.2015.7298873"},{"key":"1184_CR66","doi-asserted-by":"crossref","unstructured":"Wen, L., Du, D., Lei, Z., Li, S.\u00a0Z., & Yang, M.-H. (2015). Jots: Joint online tracking and segmentation. In CVPR.","DOI":"10.1109\/CVPR.2015.7298835"},{"key":"1184_CR67","doi-asserted-by":"crossref","unstructured":"Wu, Z., Li, F., Sukthankar, R., & Rehg, J.\u00a0M. (2015). Robust video segment proposals with painless occlusion handling. In CVPR.","DOI":"10.1109\/CVPR.2015.7299047"},{"key":"1184_CR68","doi-asserted-by":"crossref","unstructured":"Xu, N., Price, B.\u00a0L., Cohen, S., Yang, J., & Huang, T.\u00a0S. (2016). Deep interactive object selection. CVPR (pp. 373\u2013381).","DOI":"10.1109\/CVPR.2016.47"},{"key":"1184_CR69","doi-asserted-by":"crossref","unstructured":"Yu, G., & Yuan, J. (2015). Fast action proposals for human action detection and search. In CVPR.","DOI":"10.1109\/CVPR.2015.7298735"},{"key":"1184_CR70","doi-asserted-by":"crossref","unstructured":"Zhang, D., Javed, O., & Shah, M. (2013). Video object segmentation through spatially accurate and temporally dense extraction of primary object regions. In CVPR.","DOI":"10.1109\/CVPR.2013.87"},{"key":"1184_CR71","doi-asserted-by":"crossref","unstructured":"Zhao, R., Ouyang, W., Li, H., & Wang, X. (2015). Saliency detection by multi-context learning. In CVPR.","DOI":"10.1109\/CVPR.2015.7298731"},{"key":"1184_CR72","doi-asserted-by":"crossref","unstructured":"Zheng, S., Jayasumana, S., Romera-Paredes, B., Vineet, V., Su, Z., Du, D., et al. (2015). Conditional random fields as recurrent neural networks.","DOI":"10.1109\/ICCV.2015.179"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01184-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-019-01184-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01184-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,18]],"date-time":"2024-07-18T18:07:30Z","timestamp":1721326050000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-019-01184-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,27]]},"references-count":72,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2019,9]]}},"alternative-id":["1184"],"URL":"https:\/\/doi.org\/10.1007\/s11263-019-01184-2","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,5,27]]},"assertion":[{"value":"27 December 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 May 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 May 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}