{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T18:33:00Z","timestamp":1761676380505},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2016,4,25]],"date-time":"2016-04-25T00:00:00Z","timestamp":1461542400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2016,9]]},"DOI":"10.1007\/s11263-016-0906-5","type":"journal-article","created":{"date-parts":[[2016,4,25]],"date-time":"2016-04-25T15:09:35Z","timestamp":1461596975000},"page":"272-290","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":37,"title":["LIBSVX: A Supervoxel Library and Benchmark for Early Video Processing"],"prefix":"10.1007","volume":"119","author":[{"given":"Chenliang","family":"Xu","sequence":"first","affiliation":[]},{"given":"Jason J.","family":"Corso","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,4,25]]},"reference":[{"key":"906_CR1","doi-asserted-by":"crossref","unstructured":"Achanta, R., Shaji, A., Smith, K., Lucchi, A., Fua, P. & Susstrunk, S. (2012). Slic superpixels compared to state-of-the-art superpixel methods. IEEE Transactions on Pattern Analysis and Machine Intelligence.","DOI":"10.1109\/TPAMI.2012.120"},{"issue":"5","key":"906_CR2","doi-asserted-by":"crossref","first-page":"898","DOI":"10.1109\/TPAMI.2010.161","volume":"33","author":"P Arbelaez","year":"2011","unstructured":"Arbelaez, P., Maire, M., Fowlkes, C., & Malik, J. (2011). Contour detection and hierarchical image segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33(5), 898\u2013916.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"906_CR3","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s11263-010-0390-2","volume":"92","author":"S Baker","year":"2011","unstructured":"Baker, S., Scharstein, D., Lewis, J., Roth, S., Black, M. J., & Szeliski, R. (2011). A database and evaluation methodology for optical flow. International Journal of Computer Vision, 92(1), 1\u201331.","journal-title":"International Journal of Computer Vision"},{"key":"906_CR4","doi-asserted-by":"crossref","unstructured":"Brendel, W. & Todorovic, S. (2009) Video object segmentation by tracking regions. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2009.5459242"},{"key":"906_CR5","doi-asserted-by":"crossref","unstructured":"Brostow, G.J., Shotton, J., Fauqueur, J., Cipolla, R. (2008). Segmentation and recognition using structure from motion point clouds. In European Conference on Computer Vision.","DOI":"10.1007\/978-3-540-88682-2_5"},{"key":"906_CR6","doi-asserted-by":"crossref","unstructured":"Budvytis, I. & Badrinarayanan, V., Cipolla, R. (2011). Semi-supervised video segmentation using tree structured graphical models. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2011.5995600"},{"key":"906_CR7","doi-asserted-by":"crossref","unstructured":"Chang, J., Wei, D. & III J. W. F. (2013). A video representation using temporal superpixels. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2013.267"},{"key":"906_CR8","doi-asserted-by":"crossref","unstructured":"Chen, A. Y. C. & Corso, J. J. (2010). Propagating multi-class pixel labels throughout video frames. In Proceedings of Western New York Image Processing Workshop.","DOI":"10.1109\/WNYIPW.2010.5649773"},{"issue":"5","key":"906_CR9","doi-asserted-by":"crossref","first-page":"603","DOI":"10.1109\/34.1000236","volume":"24","author":"D Comaniciu","year":"2002","unstructured":"Comaniciu, D., & Meer, P. (2002). Mean shift: A robust approach toward feature space analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence, 24(5), 603\u2013619.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"5","key":"906_CR10","doi-asserted-by":"crossref","first-page":"629","DOI":"10.1109\/TMI.2007.912817","volume":"27","author":"JJ Corso","year":"2008","unstructured":"Corso, J. J., Sharon, E., Dube, S., El-Saden, S., Sinha, U., & Yuille, A. (2008). Efficient multilevel brain tumor segmentation with integrated bayesian model classification. IEEE Transactions on Medical Imaging, 27(5), 629\u2013640.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"906_CR11","doi-asserted-by":"crossref","unstructured":"de\u00a0Souza, K. J. F., de\u00a0Albuquerque\u00a0Ara\u00fajo, A., et\u00a0al (2014). Graph-based hierarchical video segmentation based on a simple dissimilarity measure. Pattern Recognition Letters.","DOI":"10.1016\/j.patrec.2014.02.016"},{"key":"906_CR12","unstructured":"DeMenthon, D. & Megret, R. (2002). Spatio-temporal segmentation of video by hierarchical mean shift analysis. In Statistical Methods in Video Processing Workshop."},{"key":"906_CR13","doi-asserted-by":"crossref","unstructured":"Drucker, F. & MacCormick, J. (2009). Fast superpixels for video analysis. In IEEE Workshop on Motion and Video Computing.","DOI":"10.1109\/WMVC.2009.5399239"},{"issue":"7","key":"906_CR14","doi-asserted-by":"crossref","first-page":"937","DOI":"10.1109\/TIP.2004.828427","volume":"13","author":"\u00c7E Erdem","year":"2004","unstructured":"Erdem, \u00c7. E., Sankur, B., & Tekalp, A. M. (2004). Performance measures for video object segmentation and tracking. IEEE Transactions on Image Processing, 13(7), 937\u2013951.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"2","key":"906_CR15","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1023\/B:VISI.0000022288.19776.77","volume":"59","author":"PF Felzenszwalb","year":"2004","unstructured":"Felzenszwalb, P. F., & Huttenlocher, D. P. (2004). Efficient graph-based image segmentation. International Journal of Computer Vision, 59(2), 167\u2013181.","journal-title":"International Journal of Computer Vision"},{"key":"906_CR16","doi-asserted-by":"crossref","unstructured":"Fowlkes, C., Belongie, S. & Malik, J. (2001) Efficient spatiotemporal grouping using the nystrom method. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2001.990481"},{"issue":"2","key":"906_CR17","doi-asserted-by":"crossref","first-page":"214","DOI":"10.1109\/TPAMI.2004.1262185","volume":"26","author":"C Fowlkes","year":"2004","unstructured":"Fowlkes, C., Belongie, S., Chung, F., & Malik, J. (2004). Spectral grouping using the nystr\u00f6m method. IEEE Transactions on Pattern Analysis and Machine Intelligence, 26(2), 214\u2013225.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"906_CR18","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1109\/TIT.1975.1055330","volume":"21","author":"K Fukunaga","year":"1975","unstructured":"Fukunaga, K., & Hostetler, L. (1975). The estimation of the gradient of a density function, with applications in pattern recognition. IEEE Transactions on Information Theory, 21(1), 32\u201340.","journal-title":"IEEE Transactions on Information Theory"},{"key":"906_CR19","unstructured":"Galasso, F., Cipolla, R. & Schiele, B. (2012). Video segmentation with superpixels. In Asian Conference on Computer Vision."},{"key":"906_CR20","doi-asserted-by":"crossref","unstructured":"Galasso, F., Nagaraja, N. S., Cardenas, T. J., Brox, T. & Schiele, B. (2013). A unified video segmentation benchmark: Annotation, metrics and analysis. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2013.438"},{"key":"906_CR21","doi-asserted-by":"crossref","unstructured":"Gould, S., Fulton, R. & Koller, D. (2009). Decomposing a scene into geometric and semantically consistent regions. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2009.5459211"},{"issue":"3","key":"906_CR22","doi-asserted-by":"crossref","first-page":"384","DOI":"10.1109\/TPAMI.2004.1262334","volume":"26","author":"H Greenspan","year":"2004","unstructured":"Greenspan, H., Goldberger, J., & Mayer, A. (2004). Probabilistic space-time video modeling via piecewise gmm. IEEE Transactions on Pattern Analysis and Machine Intelligence, 26(3), 384\u2013396.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"906_CR23","doi-asserted-by":"crossref","unstructured":"Grundmann M., Kwatra V., Han M. & Essa I. (2010). Efficient hierarchical graph-based video segmentation. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2010.5539893"},{"key":"906_CR24","series-title":"Image Analysis and Applications","first-page":"178","volume-title":"Progress in Pattern Recognition","author":"A Hanbury","year":"2008","unstructured":"Hanbury, A. (2008). How do superpixels affect image segmentation? Progress in Pattern Recognition (pp. 178\u2013186)., Image Analysis and Applications Berlin: Springer."},{"key":"906_CR25","doi-asserted-by":"crossref","unstructured":"He, X., Zemel, R. S. & Ray, D. (2006). Learning and incorporating top-down cues in image segmentation. In European Conference on Computer Vision.","DOI":"10.1007\/11744023_27"},{"key":"906_CR26","doi-asserted-by":"crossref","first-page":"577","DOI":"10.1145\/1073204.1073232","volume":"24","author":"D Hoiem","year":"2005","unstructured":"Hoiem, D., Efros, A. A., & Hebert, M. (2005). Automatic photo pop-up. ACM Transactions on Graphics, 24, 577\u2013584.","journal-title":"ACM Transactions on Graphics"},{"key":"906_CR27","doi-asserted-by":"crossref","unstructured":"Khan, S. & Shah, M. (2001). Object based segmentation of video using color, motion and spatial information. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2001.991039"},{"key":"906_CR28","doi-asserted-by":"crossref","unstructured":"Kl\u00e4ser, A., Marsza\u0142ek, M. & Schmid, C. (2008). A spatio-temporal descriptor based on 3d-gradients. In British Machine Vision Conference.","DOI":"10.5244\/C.22.99"},{"issue":"2","key":"906_CR29","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1007\/s11263-005-1838-7","volume":"64","author":"I Laptev","year":"2005","unstructured":"Laptev, I. (2005). On space-time interest points. International Journal of Computer Vision, 64(2), 107\u2013123.","journal-title":"International Journal of Computer Vision"},{"key":"906_CR30","unstructured":"Lee, J. & Choi, S. (2014). Incremental tree-based inference with dependent normalized random measures. In Proceedings of the Seventeenth International Conference on Artificial Intelligence and Statistics (pp. 558\u2013566)."},{"key":"906_CR31","doi-asserted-by":"crossref","unstructured":"Lee, Y. J., Kim, J., Grauman, K. (2011). Key-segments for video object segmentation. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2011.6126471"},{"issue":"12","key":"906_CR32","doi-asserted-by":"crossref","first-page":"2290","DOI":"10.1109\/TPAMI.2009.96","volume":"31","author":"A Levinshtein","year":"2009","unstructured":"Levinshtein, A., Stere, A., Kutulakos, K. N., Fleet, D. J., Dickinson, S. J., & Siddiqi, K. (2009). Turbopixels: Fast superpixels using geometric flows. IEEE Transactions on Pattern Analysis and Machine Intelligence, 31(12), 2290\u20132297.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"906_CR33","doi-asserted-by":"crossref","unstructured":"Li, F., Kim, T., Humayun, A., Tsai, D., Rehg, J. M. (2013). Video segmentation by tracking many figure-ground segments. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2013.273"},{"key":"906_CR34","doi-asserted-by":"crossref","unstructured":"Liu, C., Freeman, W. T., Adelson, E. H., Weiss, Y. (2008a). Human-assisted motion annotation. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2008.4587845"},{"key":"906_CR35","doi-asserted-by":"crossref","unstructured":"Liu, M.Y., Tuzel, O., Ramalingam, S., Chellappa, R. (2011). Entropy rate superpixel segmentation. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2011.5995323"},{"key":"906_CR36","unstructured":"Liu, S., Dong, G., Yan, C. H. & Ong, S. H. (2008b). Video segmentation: Propagation, validation and aggregation of a preceding graph. In IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"906_CR37","doi-asserted-by":"crossref","unstructured":"Megret, R., & DeMenthon, D. (2002). A survey of spatio-temporal grouping techniques. UMD: Technical report.","DOI":"10.21236\/ADA459242"},{"key":"906_CR38","doi-asserted-by":"crossref","unstructured":"Moore, A.P., Prince, S., Warrell, J., Mohammed, U. & Jones, G. (2008). Superpixel lattices. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2008.4587471"},{"key":"906_CR39","unstructured":"Mori, G., Ren, X., Efros, A. A. & Malik, J. (2004). Recovering human body configurations: Combining segmentation and recognition. In IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"906_CR40","doi-asserted-by":"crossref","unstructured":"Palou G. & Salembier, P. (2013) Hierarchical video representation with trajectory binary partition tree. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2013.273"},{"key":"906_CR41","doi-asserted-by":"crossref","unstructured":"Paris S. (2008) Edge-preserving smoothing and mean-shift segmentation of video streams. In European Conference on Computer Vision.","DOI":"10.1007\/978-3-540-88688-4_34"},{"key":"906_CR42","doi-asserted-by":"crossref","unstructured":"Paris S. & Durand, F. (2007). A topological approach to hierarchical segmentation using mean shift. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2007.383228"},{"issue":"4","key":"906_CR43","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1016\/S0031-3203(96)00114-8","volume":"30","author":"NV Patel","year":"1997","unstructured":"Patel, N. V., & Sethi, I. K. (1997). Video shot detection and characterization for video databases. Pattern Recognition, 30(4), 583\u2013592.","journal-title":"Pattern Recognition"},{"key":"906_CR44","doi-asserted-by":"crossref","unstructured":"Ren, X. & Malik, J. (2003) Learning a classification model for segmentation. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2003.1238308"},{"key":"906_CR45","doi-asserted-by":"crossref","unstructured":"Reso, M., Jachalsky, J., Rosenhahn, B. & Ostermann, J. (2013). Temporally consistent superpixels. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2013.55"},{"issue":"5","key":"906_CR46","doi-asserted-by":"crossref","first-page":"530","DOI":"10.1109\/34.589215","volume":"19","author":"C Schmid","year":"1997","unstructured":"Schmid, C., & Mohr, R. (1997). Local grayvalue invariants for image retrieval. IEEE Transactions on Pattern Analysis and Machine Intelligence, 19(5), 530\u2013535.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"906_CR47","doi-asserted-by":"crossref","unstructured":"Sharon, E., Brandt, A. & Basri, R. (2000). Fast multiscale image segmentation. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2000.855801"},{"issue":"7104","key":"906_CR48","doi-asserted-by":"crossref","first-page":"810","DOI":"10.1038\/nature04977","volume":"442","author":"E Sharon","year":"2006","unstructured":"Sharon, E., Galun, M., Sharon, D., Basri, R., & Brandt, A. (2006). Hierarchy and adaptivity in segmenting visual scenes. Nature, 442(7104), 810\u2013813.","journal-title":"Nature"},{"key":"906_CR49","doi-asserted-by":"crossref","unstructured":"Shi, J., & Malik, J. (2000). Normalized cuts and image segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(8), 888\u2013905.","DOI":"10.1109\/34.868688"},{"issue":"1","key":"906_CR50","doi-asserted-by":"crossref","first-page":"2","DOI":"10.1007\/s11263-007-0109-1","volume":"81","author":"J Shotton","year":"2009","unstructured":"Shotton, J., Winn, J., Rother, C., & Criminisi, A. (2009). Textonboost for image understanding: Multi-class object recognition and segmentation by jointly modeling texture, layout, and context. International Journal of Computer Vision, 81(1), 2\u201323.","journal-title":"International Journal of Computer Vision"},{"key":"906_CR51","doi-asserted-by":"crossref","unstructured":"Sundberg, P., Brox, T., Maire, M., Arbel\u00e1ez, P., Malik, J. (2011). Occlusion boundary detection and figure\/ground assignment from optical flow. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2011.5995364"},{"key":"906_CR52","doi-asserted-by":"crossref","unstructured":"Tighe, J. & Lazebnik, S. (2010). Superparsing: Scalable nonparametric image parsing with superpixels. International Journal of Computer Vision.","DOI":"10.1007\/978-3-642-15555-0_26"},{"key":"906_CR53","doi-asserted-by":"crossref","unstructured":"Tripathi, S., Hwang, Y., Belongie, S. & Nguyen, T. (2014). Improving streaming video segmentation with early and mid-level visual processing. In IEEE Winter Conference on Applications of Computer Vision.","DOI":"10.1109\/WACV.2014.6836063"},{"key":"906_CR54","doi-asserted-by":"crossref","unstructured":"Tsai, D., Flagg, M. & Rehg, J. M. (2010) Motion coherent tracking with multi-label mrf optimization. In British Machine Vision Conference.","DOI":"10.5244\/C.24.56"},{"key":"906_CR55","unstructured":"Van\u00a0den Bergh, M., Roig, G., Boix, X., Manen, S. & Van\u00a0Gool, L. (2013). Online video seeds for temporal window objectness. In IEEE International Conference on Computer Vision."},{"key":"906_CR56","doi-asserted-by":"crossref","unstructured":"Vazquez-Reina, A., Avidan, S., Pfister, H. & Miller, E. (2010). Multiple hypothesis video segmentation from superpixel flows. In European Conference on Computer Vision.","DOI":"10.1007\/978-3-642-15555-0_20"},{"key":"906_CR57","doi-asserted-by":"crossref","unstructured":"Veksler, O., Boykov, Y., Mehrani, P. (2010) Superpixels and supervoxels in an energy optimization framework. In European Conference on Computer Vision.","DOI":"10.1007\/978-3-642-15555-0_16"},{"issue":"6","key":"906_CR58","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1109\/34.87344","volume":"13","author":"L Vincent","year":"1991","unstructured":"Vincent, L., & Soille, P. (1991). Watersheds in digital spaces: An efficient algorithm based on immersion simulations. IEEE Transactions on Pattern Analysis and Machine Intelligence, 13(6), 583\u2013598.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"906_CR59","doi-asserted-by":"crossref","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C. L. (2013). Dense trajectories and motion boundary descriptors for action recognition. International Journal of Computer Vision.","DOI":"10.1007\/s11263-012-0594-8"},{"key":"906_CR60","doi-asserted-by":"crossref","unstructured":"Wang, J., Thiesson, B., Xu, Y., & Cohen, M. (2004). Image and video segmentation by anisotropic kernel mean shift. European Conference on Computer Vision","DOI":"10.1007\/978-3-540-24671-8_19"},{"key":"906_CR61","unstructured":"Xu, C., Corso, J. J. (2012). Evaluation of super-voxel methods for early video processing. In IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"906_CR62","doi-asserted-by":"crossref","unstructured":"Xu, C., Xiong, C. & Corso, J. J. (2012). Streaming hierarchical video segmentation. In European Conference on Computer Vision.","DOI":"10.1007\/978-3-642-33783-3_45"},{"key":"906_CR63","doi-asserted-by":"crossref","unstructured":"Xu, C., Whitt, S. & Corso, J. J. (2013). Flattening supervoxel hierarchies by the uniform entropy slice. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2013.279"},{"key":"906_CR64","doi-asserted-by":"crossref","unstructured":"Zeng, G., Wang, P., Wang, J., Gan, R. & Zha, H. (2011). Structure-sensitive superpixels via geodesic distance. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2011.6126274"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-016-0906-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-016-0906-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-016-0906-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,7]],"date-time":"2019-09-07T00:50:39Z","timestamp":1567817439000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-016-0906-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,4,25]]},"references-count":64,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2016,9]]}},"alternative-id":["906"],"URL":"https:\/\/doi.org\/10.1007\/s11263-016-0906-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,4,25]]}}}