{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,12]],"date-time":"2026-02-12T09:48:18Z","timestamp":1770889698317,"version":"3.50.1"},"reference-count":88,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2019,5,13]],"date-time":"2019-05-13T00:00:00Z","timestamp":1557705600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2019,5,13]],"date-time":"2019-05-13T00:00:00Z","timestamp":1557705600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100006595","name":"UEFISCDI","doi-asserted-by":"crossref","award":["PN-III-P4-ID-ERC-2016-0007"],"award-info":[{"award-number":["PN-III-P4-ID-ERC-2016-0007"]}],"id":[{"id":"10.13039\/501100006595","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100006595","name":"UEFISCDI","doi-asserted-by":"crossref","award":["PN-III-P2-2.1-PED-2016-1842"],"award-info":[{"award-number":["PN-III-P2-2.1-PED-2016-1842"]}],"id":[{"id":"10.13039\/501100006595","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100006595","name":"UEFISCDI","doi-asserted-by":"crossref","award":["PN-III-P1-1.2-PCCDI-2017-0734"],"award-info":[{"award-number":["PN-III-P1-1.2-PCCDI-2017-0734"]}],"id":[{"id":"10.13039\/501100006595","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1007\/s11263-019-01183-3","type":"journal-article","created":{"date-parts":[[2019,5,14]],"date-time":"2019-05-14T02:35:56Z","timestamp":1557801356000},"page":"1279-1302","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":39,"title":["Unsupervised Learning of Foreground Object Segmentation"],"prefix":"10.1007","volume":"127","author":[{"given":"Ioana","family":"Croitoru","sequence":"first","affiliation":[]},{"given":"Simion-Vlad","family":"Bogolin","sequence":"additional","affiliation":[]},{"given":"Marius","family":"Leordeanu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,5,13]]},"reference":[{"key":"1183_CR1","unstructured":"Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., et al. (2015). Tensorflow: Large-scale machine learning on heterogeneous systems. Software available from \n                              https:\/\/www.tensorflow.org\/"},{"key":"1183_CR2","doi-asserted-by":"crossref","unstructured":"Agrawal, P., Carreira, J., & Malik, J. (2015). Learning to see by moving. In Proceedings of the IEEE international conference on computer vision (pp. 37\u201345).","DOI":"10.1109\/ICCV.2015.13"},{"key":"1183_CR3","doi-asserted-by":"crossref","unstructured":"Alexe, B., Deselaers, T., & Ferrari, V. (2010). What is an object? In CVPR.","DOI":"10.1109\/CVPR.2010.5540226"},{"issue":"6","key":"1183_CR4","doi-asserted-by":"publisher","first-page":"1709","DOI":"10.1109\/TIP.2010.2101613","volume":"20","author":"O Barnich","year":"2011","unstructured":"Barnich, O., & Van Droogenbroeck, M. (2011). Vibe: A universal background subtraction algorithm for video sequences. IEEE Transactions on Image processing, 20(6), 1709\u20131724.","journal-title":"IEEE Transactions on Image processing"},{"key":"1183_CR5","doi-asserted-by":"crossref","unstructured":"Bau, D., Zhou, B., Khosla, A., Oliva, A., & Torralba, A. (2017). Network dissection: Quantifying interpretability of deep visual representations. In International conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.354"},{"key":"1183_CR6","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Louradour, J., Collobert, R., & Weston, J. (2009). Curriculum learning. In Proceedings of the 26th annual international conference on machine learning (pp. 41\u201348). ACM.","DOI":"10.1145\/1553374.1553380"},{"key":"1183_CR7","doi-asserted-by":"crossref","unstructured":"Borji, A., Sihite, D., & Itti, L. (2012). Salient object detection: a benchmark. In ECCV.","DOI":"10.1007\/978-3-642-33709-3_30"},{"key":"1183_CR8","doi-asserted-by":"crossref","unstructured":"Chen, X., Shrivastava, A., & Gupta, A. (2014). Enriching visual knowledge bases via object discovery and segmentation. In CVPR.","DOI":"10.1109\/CVPR.2014.261"},{"key":"1183_CR9","doi-asserted-by":"crossref","unstructured":"Cheng, J., Tsai, Y. H., Wang, S., & Yang, M. H. (2017). Segflow: Joint learning for video object segmentation and optical flow. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.81"},{"issue":"3","key":"1183_CR10","doi-asserted-by":"publisher","first-page":"569","DOI":"10.1109\/TPAMI.2014.2345401","volume":"37","author":"M Cheng","year":"2015","unstructured":"Cheng, M., Mitra, N., Huang, X., Torr, P., & Hu, S. (2015). Global contrast based salient region detection. PAMI, 37(3), 569\u2013582.","journal-title":"PAMI"},{"key":"1183_CR11","doi-asserted-by":"crossref","unstructured":"Cho, M., Kwak, S., Schmid, C., & Ponce, J. (2015). Unsupervised object discovery and localization in the wild: Part-based matching with bottom-up region proposals. In CVPR.","DOI":"10.1109\/CVPR.2015.7298724"},{"key":"1183_CR12","doi-asserted-by":"crossref","unstructured":"Croitoru, I., Bogolin, S.V., & Leordeanu, M. (2017). Unsupervised learning from video to detect foreground objects in single images. In 2017 IEEE international conference on computer vision (ICCV) (pp. 4345\u20134353). IEEE.","DOI":"10.1109\/ICCV.2017.465"},{"issue":"10","key":"1183_CR13","doi-asserted-by":"publisher","first-page":"1337","DOI":"10.1109\/TPAMI.2003.1233909","volume":"25","author":"R Cucchiara","year":"2003","unstructured":"Cucchiara, R., Grana, C., Piccardi, M., & Prati, A. (2003). Detecting moving objects, ghosts, and shadows in video streams. PAMI, 25(10), 1337\u20131342.","journal-title":"PAMI"},{"issue":"3","key":"1183_CR14","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1007\/s11263-012-0538-3","volume":"100","author":"T Deselaers","year":"2012","unstructured":"Deselaers, T., Alexe, B., & Ferrari, V. (2012). Weakly supervised localization and learning with generic knowledge. IJCV, 100(3), 275\u2013293.","journal-title":"IJCV"},{"key":"1183_CR15","doi-asserted-by":"crossref","unstructured":"Doersch, C., Gupta, A., & Efros, A. A. (2015). Unsupervised visual representation learning by context prediction. In Proceedings of the IEEE international conference on computer vision (pp. 1422\u20131430).","DOI":"10.1109\/ICCV.2015.167"},{"key":"1183_CR16","unstructured":"Donahue, J., Kr\u00e4henb\u00fchl, P., & Darrell, T. (2016). Adversarial feature learning. arXiv preprint \n                              arXiv:1605.09782\n                              \n                           ."},{"key":"1183_CR17","doi-asserted-by":"crossref","unstructured":"Dutt Jain, S., Xiong, B., & Grauman, K. (2017). Fusionseg: Learning to combine motion and appearance for fully automatic segmentation of generic objects in videos. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.228"},{"issue":"1","key":"1183_CR18","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S. M. A., Van Gool, L., Williams, C. K. I., Winn, J., & Zisserman, A. (2015). The pascal visual object classes challenge: A retrospective. International Journal of Computer Vision, 111(1), 98\u2013136.","journal-title":"International Journal of Computer Vision"},{"issue":"2","key":"1183_CR19","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K. I., Winn, J., & Zisserman, A. (2010). The PASCAL visual object classes (voc) challenge. International Journal of Computer Vision, 88(2), 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"key":"1183_CR20","unstructured":"Finn, C., Goodfellow, I., & Levine, S. (2016). Unsupervised learning for physical interaction through video prediction. In Advances in neural information processing systems (pp. 64\u201372)"},{"key":"1183_CR21","doi-asserted-by":"crossref","unstructured":"Girshick, R. (2015). Fast R-CNN. In Proceedings of the IEEE international conference on computer vision (pp. 1440\u20131448).","DOI":"10.1109\/ICCV.2015.169"},{"key":"1183_CR22","unstructured":"Goroshin, R., Mathieu, M. F., & LeCun, Y. (2015). Learning to linearize under uncertainty. In Advances in neural information processing systems (pp. 1234\u20131242)."},{"key":"1183_CR23","doi-asserted-by":"crossref","unstructured":"Haller, E., & Leordeanu, M. (2017). Unsupervised object segmentation in video by efficient selection of highly probable positive features. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.544"},{"key":"1183_CR24","doi-asserted-by":"crossref","unstructured":"Hou, X., & Zhang, L. (2007). Saliency detection: A spectral residual approach. In CVPR.","DOI":"10.1109\/CVPR.2007.383267"},{"issue":"3","key":"1183_CR25","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1145\/331499.331504","volume":"31","author":"AK Jain","year":"1999","unstructured":"Jain, A. K., Murty, M. N., & Flynn, P. J. (1999). Data clustering: A review. ACM Computing Surveys, 31(3), 264\u2013323.","journal-title":"ACM Computing Surveys"},{"key":"1183_CR26","doi-asserted-by":"crossref","unstructured":"J\u00e9gou, S., Drozdzal, M., Vazquez, D., Romero, A., & Bengio, Y. (2017). The one hundred layers tiramisu: Fully convolutional densenets for semantic segmentation. In 2017 IEEE conference on computer vision and pattern recognition workshops (CVPRW) (pp. 1175\u20131183). IEEE.","DOI":"10.1109\/CVPRW.2017.156"},{"key":"1183_CR27","doi-asserted-by":"crossref","unstructured":"Jiang, H., Wang, J., Yuan, Z., Wu, Y., Zheng, N., & Li, S. (2013). Salient object detection: A discriminative regional feature integration approach. In CVPR.","DOI":"10.1109\/CVPR.2013.271"},{"key":"1183_CR28","doi-asserted-by":"crossref","unstructured":"Joulin, A., Bach, F., & Ponce, J. (2010). Discriminative clustering for image co-segmentation. In CVPR.","DOI":"10.1109\/CVPR.2010.5539868"},{"key":"1183_CR29","doi-asserted-by":"crossref","unstructured":"Joulin, A., Bach, F., & Ponce, J. (2012). Multi-class cosegmentation. In CVPR.","DOI":"10.1109\/CVPR.2012.6247719"},{"key":"1183_CR30","doi-asserted-by":"crossref","unstructured":"Joulin, A., Tang, K., & Fei-Fei, L. (2014). Efficient image and video co-localization with Frank\u2013Wolfe algorithm. In ECCV.","DOI":"10.1007\/978-3-319-10599-4_17"},{"key":"1183_CR31","unstructured":"Jun\u00a0Koh, Y., Jang, W.D., & Kim, C. S. (2016). Pod: Discovering primary objects in videos based on evolutionary refinement of object recurrence, background, and primary object models. In CVPR."},{"issue":"11","key":"1183_CR32","doi-asserted-by":"publisher","first-page":"2327","DOI":"10.1109\/TPAMI.2016.2551239","volume":"38","author":"V Kalogeiton","year":"2016","unstructured":"Kalogeiton, V., Ferrari, V., & Schmid, C. (2016). Analysing domain shift factors between videos and images for object detection. PAMI, 38(11), 2327\u20132334.","journal-title":"PAMI"},{"key":"1183_CR33","doi-asserted-by":"crossref","unstructured":"Khoreva, A., Benenson, R., Hosang, J.H., Hein, M., & Schiele, B. (2017). Simple does it: Weakly supervised instance and semantic segmentation. In CVPR (Vol. 1, p. 3).","DOI":"10.1109\/CVPR.2017.181"},{"key":"1183_CR34","unstructured":"Kim, G., Xing, E., Fei-Fei, L., & Kanade, T. (2011). Distributed cosegmentation via submodular optimization on anisotropic diffusion. In ICCV."},{"key":"1183_CR35","unstructured":"Kingma, D., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv preprint \n                              arXiv:1412.6980\n                              \n                           ."},{"key":"1183_CR36","unstructured":"Kr\u00e4henb\u00fchl, P., Doersch, C., Donahue, J., & Darrell, T. (2015). Data-dependent initializations of convolutional neural networks. arXiv preprint \n                              arXiv:1511.06856\n                              \n                           ."},{"key":"1183_CR37","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097\u20131105)."},{"key":"1183_CR38","doi-asserted-by":"crossref","unstructured":"Kuettel, D., Guillaumin, M., & Ferrari, V. (2012). Segmentation propagation in imagenet. In ECCV.","DOI":"10.1007\/978-3-642-33786-4_34"},{"key":"1183_CR39","doi-asserted-by":"crossref","unstructured":"Larsson, G., Maire, M., & Shakhnarovich, G. (2016). Learning representations for automatic colorization. In European Conference on Computer Vision (pp. 577\u2013593). Springer, Berlin.","DOI":"10.1007\/978-3-319-46493-0_35"},{"key":"1183_CR40","doi-asserted-by":"crossref","unstructured":"Lee, H. Y., Huang, J. B., Singh, M., & Yang, M. H. (2017). Unsupervised representation learning by sorting sequences. In 2017 IEEE international conference on computer vision (ICCV) (pp. 667\u2013676). IEEE.","DOI":"10.1109\/ICCV.2017.79"},{"key":"1183_CR41","doi-asserted-by":"crossref","unstructured":"Lee, Y. J., Kim, J., & Grauman, K. (2011). Key-segments for video object segmentation. In 2011 IEEE international conference on computer vision (ICCV) (pp. 1995\u20132002). IEEE.","DOI":"10.1109\/ICCV.2011.6126471"},{"key":"1183_CR42","doi-asserted-by":"crossref","unstructured":"Leordeanu, M., Collins, R., & Hebert, M. (2005). Unsupervised learning of object features from video sequences. In CVPR.","DOI":"10.1109\/CVPR.2005.359"},{"key":"1183_CR43","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1007\/s11263-011-0442-2","volume":"96","author":"M Leordeanu","year":"2012","unstructured":"Leordeanu, M., Sukthankar, R., & Hebert, M. (2012). Unsupervised learning for graph matching. International Journal of Computer Vision, 96, 28\u201345.","journal-title":"International Journal of Computer Vision"},{"key":"1183_CR44","doi-asserted-by":"crossref","unstructured":"Li, D., Hung, W. C., Huang, J. B., Wang, S., Ahuja, N., & Yang, M. H. (2016). Unsupervised visual representation learning by graph-based consistent constraints. In ECCV.","DOI":"10.1007\/978-3-319-46493-0_41"},{"key":"1183_CR45","doi-asserted-by":"crossref","unstructured":"Li, N., Sun, B., & Yu, J. (2015). A weighted sparse coding framework for saliency detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 5216\u20135223).","DOI":"10.1109\/CVPR.2015.7299158"},{"key":"1183_CR46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.43","volume-title":"The secrets of salient object segmentation","author":"Y Li","year":"2014","unstructured":"Li, Y., Hou, X., Koch, C., Rehg, J. M., & Yuille, A. L. (2014). The secrets of salient object segmentation. Atlanta: Georgia Institute of Technology."},{"key":"1183_CR47","doi-asserted-by":"crossref","unstructured":"Liu, D., Chen, T. (2007) A topic-motion model for unsupervised video object discovery. In CVPR.","DOI":"10.1109\/CVPR.2007.383220"},{"key":"1183_CR48","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., & Darrell, T. (2015). Fully convolutional networks for semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3431\u20133440).","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1183_CR49","doi-asserted-by":"crossref","unstructured":"Misra, I., Zitnick, C. L., & Hebert, M. (2016). Shuffle and learn: unsupervised learning using temporal order verification. In ECCV.","DOI":"10.1007\/978-3-319-46448-0_32"},{"key":"1183_CR50","doi-asserted-by":"crossref","unstructured":"Nguyen, M., Torresani, L., La\u00a0Torre, F. D., & Rother, C. (2009). Weakly supervised discriminative localization and classification: A joint learning process. In CVPR.","DOI":"10.21236\/ADA507101"},{"key":"1183_CR51","doi-asserted-by":"crossref","unstructured":"Noroozi, M., & Favaro, P. (2016). Unsupervised learning of visual representations by solving jigsaw puzzles. In European conference on computer vision (pp. 69\u201384). Springer, Berlin.","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"1183_CR52","doi-asserted-by":"crossref","unstructured":"Owens, A., Wu, J., McDermott, J. H., Freeman, W. T., & Torralba, A. (2016). Ambient sound provides supervision for visual learning. In European conference on computer vision (pp. 801\u2013816). Springer, Berlin.","DOI":"10.1007\/978-3-319-46448-0_48"},{"key":"1183_CR53","doi-asserted-by":"crossref","unstructured":"Papazoglou, A., & Ferrari, V. (2013). Fast object segmentation in unconstrained video. In ICCV.","DOI":"10.1109\/ICCV.2013.223"},{"key":"1183_CR54","doi-asserted-by":"crossref","unstructured":"Parikh, D., & Chen, T. (2007). Unsupervised identification of multiple objects of interest from multiple images: Discover. In Asian conference on computer vision.","DOI":"10.1007\/978-3-540-76390-1_48"},{"key":"1183_CR55","doi-asserted-by":"crossref","unstructured":"Pathak, D., Girshick, R., Dollar, P., Darrell, T., & Hariharan, B. (2017). Learning features by watching objects move. In CVPR.","DOI":"10.1109\/CVPR.2017.638"},{"key":"1183_CR56","doi-asserted-by":"crossref","unstructured":"Pathak, D., Krahenbuhl, P., Donahue, J., Darrell, T., & Efros, A. A. (2016). Context encoders: Feature learning by inpainting. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2536\u20132544).","DOI":"10.1109\/CVPR.2016.278"},{"key":"1183_CR57","doi-asserted-by":"crossref","unstructured":"Pinheiro, P.O., Lin, T. Y., Collobert, R., & Doll\u00e1r, P. (2016). Learning to refine object segments. In ECCV.","DOI":"10.1007\/978-3-319-46448-0_5"},{"key":"1183_CR58","doi-asserted-by":"crossref","unstructured":"Prest, A., Leistner, C., Civera, J., Schmid, C., & Ferrari, V. (2012). Learning object class detectors from weakly annotated video. In CVPR (pp. 3282\u20133289). IEEE.","DOI":"10.1109\/CVPR.2012.6248065"},{"key":"1183_CR59","doi-asserted-by":"crossref","unstructured":"Radenovi\u0107, F., Tolias, G., & Chum, O. (2016). CNN image retrieval learns from bow: Unsupervised fine-tuning with hard examples. In ECCV.","DOI":"10.1007\/978-3-319-46448-0_1"},{"key":"1183_CR60","unstructured":"Raiko, T., Valpola, H., & LeCun, Y. (2012). Deep learning made easier by linear transformations in perceptrons. In AISTATS (Vol. 22, pp. 924\u2013932)."},{"key":"1183_CR61","doi-asserted-by":"crossref","unstructured":"Raina, R., Battle, A., Lee, H., Packer, B., & Ng, A. Y. (2007). Self-taught learning: transfer learning from unlabeled data. In Proceedings of the 24th international conference on machine learning (pp. 759\u2013766). ACM.","DOI":"10.1145\/1273496.1273592"},{"key":"1183_CR62","doi-asserted-by":"crossref","unstructured":"Real, E., Shlens, J., Mazzocchi, S., Pan, X., & Vanhoucke, V. (2017). Youtube-boundingboxes: A large high-precision human-annotated data set for object detection in video. In 2017 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 7464\u20137473). IEEE.","DOI":"10.1109\/CVPR.2017.789"},{"key":"1183_CR63","doi-asserted-by":"crossref","unstructured":"Rochan, M., & Wang, Y. (2014). Efficient object localization and segmentation in weakly labeled videos. In Advances in visual computing (pp. 172\u2013181). Springer, Berlin.","DOI":"10.1007\/978-3-319-14249-4_17"},{"key":"1183_CR64","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1038\/scientificamerican1290-84","volume":"263","author":"I Rock","year":"1990","unstructured":"Rock, I., & Palmer, S. (1990). Gestalt psychology. Scientific American, 263, 84\u201390.","journal-title":"Scientific American"},{"key":"1183_CR65","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., & Brox, T. (2015). U-net: Convolutional networks for biomedical image segmentation. In International conference on medical image computing and computer-assisted intervention (pp. 234\u2013241). Springer, Berlin.","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"1183_CR66","doi-asserted-by":"crossref","unstructured":"Rother, C., Kolmogorov, V., & Blake, A. (2004). Grabcut: Interactive foreground extraction using iterated graph cuts. In ACM transactions on graphics (Vol. 23, pp. 309\u2013314).","DOI":"10.1145\/1015706.1015720"},{"key":"1183_CR67","doi-asserted-by":"crossref","unstructured":"Rubinstein, M., Joulin, A., Kopf, J., & Liu, C. (2013). Unsupervised joint object discovery and segmentation in internet images. In CVPR.","DOI":"10.1109\/CVPR.2013.253"},{"key":"1183_CR68","unstructured":"Rubio, J., Serrat, J., & L\u00f3pez, A. (2012). Video co-segmentation. In ACCV."},{"key":"1183_CR69","doi-asserted-by":"crossref","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., et al. (2015). Imagenet large scale visual recognition challenge. IJCV, 115(3).","DOI":"10.1007\/s11263-015-0816-y"},{"key":"1183_CR70","doi-asserted-by":"crossref","unstructured":"Siva, P., Russell, C., Xiang, T., & Agapito, L. (2013). Looking beyond the image: Unsupervised learning for object saliency and detection. In CVPR.","DOI":"10.1109\/CVPR.2013.416"},{"key":"1183_CR71","doi-asserted-by":"crossref","unstructured":"Sivic, J., Russell, B., Efros, A., Zisserman, A., & Freeman, W. (2005). Discovering objects and their location in images. In ICCV.","DOI":"10.1109\/ICCV.2005.77"},{"key":"1183_CR72","doi-asserted-by":"crossref","unstructured":"Stretcu, O., & Leordeanu, M. (2015). Multiple frames matching for object discovery in video. In BMVC.","DOI":"10.5244\/C.29.186"},{"key":"1183_CR73","doi-asserted-by":"crossref","unstructured":"Tang, K., Joulin, A., Li, L. J., & Fei-Fei, L. (2014). Co-localization in real-world images. In CVPR.","DOI":"10.1109\/CVPR.2014.190"},{"key":"1183_CR74","unstructured":"Thomee, B., Shamma, D. A., Friedland, G., Elizalde, B., Ni, K., Poland, D., Borth, D., & Li, L. J. (2015). Yfcc100m: The new data in multimedia research. arXiv preprint \n                              arXiv:1503.01817\n                              \n                           ."},{"key":"1183_CR75","unstructured":"Tokmakov, P., Alahari, K., & Schmid, C. (2016). Learning semantic segmentation with weakly-annotated videos. In ECCV (Vol. 1, p. 6)."},{"key":"1183_CR76","doi-asserted-by":"crossref","unstructured":"Tokmakov, P., Alahari, K., & Schmid, C. (2017). Learning motion patterns in videos. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.64"},{"key":"1183_CR77","doi-asserted-by":"crossref","unstructured":"Tu, W. C., He, S., Yang, Q., & Chien, S. Y. (2016). Real-time salient object detection with a minimum spanning tree. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2334\u20132342).","DOI":"10.1109\/CVPR.2016.256"},{"key":"1183_CR78","doi-asserted-by":"crossref","unstructured":"Vicente, S., Rother, C., & Kolmogorov, V. (2011). Object cosegmentation. In CVPR.","DOI":"10.1109\/CVPR.2011.5995530"},{"key":"1183_CR79","doi-asserted-by":"crossref","unstructured":"Wang, X., & Gupta, A. (2015). Unsupervised learning of visual representations using videos. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.320"},{"key":"1183_CR80","doi-asserted-by":"crossref","unstructured":"Wei, Y., Wen, F., Zhu, W., & Sun, J. (2012). Geodesic saliency using background priors. In European conference on computer vision (pp. 29\u201342). Springer, Berlin.","DOI":"10.1007\/978-3-642-33712-3_3"},{"key":"1183_CR81","unstructured":"Xue, T., Wu, J., Bouman, K., & Freeman, B. (2016). Visual dynamics: Probabilistic future frame synthesis via cross convolutional networks. In Advances in neural information processing systems (pp. 91\u201399)."},{"key":"1183_CR82","doi-asserted-by":"crossref","unstructured":"Yang, C., Zhang, L., Lu, H., Ruan, X., & Yang, M. H. (2013). Saliency detection via graph-based manifold ranking. In 2013 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 3166\u20133173). IEEE.","DOI":"10.1109\/CVPR.2013.407"},{"key":"1183_CR83","unstructured":"Yu, F., & Koltun, V. (2015). Multi-scale context aggregation by dilated convolutions. arXiv preprint \n                              arXiv:1511.07122\n                              \n                           ."},{"key":"1183_CR84","doi-asserted-by":"crossref","unstructured":"Zhang, D., Han, J., & Zhang, Y. (2017a). Supervision by fusion: Towards unsupervised learning of deep salient object detector. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4048\u20134056).","DOI":"10.1109\/ICCV.2017.436"},{"key":"1183_CR85","doi-asserted-by":"crossref","unstructured":"Zhang, J., Sclaroff, S., Lin, Z., Shen, X., Price, B., & Mech, R. (2015). Minimum barrier salient object detection at 80 fps. In Proceedings of the IEEE international conference on computer vision (pp. 1404\u20131412).","DOI":"10.1109\/ICCV.2015.165"},{"key":"1183_CR86","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., & Efros, A. A. (2016). Colorful image colorization. In European conference on computer vision (pp. 649\u2013666). Springer, Berlin.","DOI":"10.1007\/978-3-319-46487-9_40"},{"key":"1183_CR87","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., & Efros, A. A. (2017b). Split-brain autoencoders: Unsupervised learning by cross-channel prediction. In CVPR (Vol. 1, p. 5).","DOI":"10.1109\/CVPR.2017.76"},{"key":"1183_CR88","doi-asserted-by":"crossref","unstructured":"Zhu, W., Liang, S., Wei, Y., & Sun, J. (2014). Saliency optimization from robust background detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2814\u20132821).","DOI":"10.1109\/CVPR.2014.360"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01183-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-019-01183-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01183-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,11]],"date-time":"2020-05-11T23:17:26Z","timestamp":1589239046000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-019-01183-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,13]]},"references-count":88,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2019,9]]}},"alternative-id":["1183"],"URL":"https:\/\/doi.org\/10.1007\/s11263-019-01183-3","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,5,13]]},"assertion":[{"value":"23 June 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 April 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 May 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}