{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T19:57:30Z","timestamp":1760385450707,"version":"3.41.0"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2015,11,13]],"date-time":"2015-11-13T00:00:00Z","timestamp":1447372800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2016,6]]},"DOI":"10.1007\/s11263-015-0862-5","type":"journal-article","created":{"date-parts":[[2015,11,13]],"date-time":"2015-11-13T14:00:00Z","timestamp":1447423200000},"page":"130-150","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":25,"title":["Exploiting Privileged Information from Web Data for Action and Event Recognition"],"prefix":"10.1007","volume":"118","author":[{"given":"Li","family":"Niu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5559-8594","authenticated-orcid":false,"given":"Wen","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dong","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,11,13]]},"reference":[{"issue":"3","key":"862_CR1","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1145\/1922649.1922653","volume":"43","author":"JK Aggarwal","year":"2011","unstructured":"Aggarwal, J. K., & Ryoo, M. S. (2011). Human activity analysis: A review. ACM Computing Surveys (CSUR), 43(3), 16.","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"862_CR2","unstructured":"Andrews, S., Tsochantaridis, I., & Hofmann, T. (2003). Support vector machines for multiple-instance learning. In Advances in Neural Information Processing Systems (NIPS) (pp. 561\u2013568)."},{"key":"862_CR3","doi-asserted-by":"crossref","unstructured":"Baktashmotlagh, M., Harandi, M., & Brian\u00a0Lovell, M. S. (2013). Unsupervised domain adaptation by domain invariant projection. In IEEE International Conference on Computer Vision (ICCV) (pp. 769\u2013776).","DOI":"10.1109\/ICCV.2013.100"},{"key":"862_CR4","unstructured":"Bergamo, A., & Torresani, L. (2010). Exploiting weakly-labeled web images to improve object classification: a domain adaptation approach. In Advances in Neural Information Processing Systems (NIPS) (pp. 181\u2013189)."},{"issue":"1358","key":"862_CR5","doi-asserted-by":"crossref","first-page":"1257","DOI":"10.1098\/rstb.1997.0108","volume":"352","author":"AF Bobick","year":"1997","unstructured":"Bobick, A. F. (1997). Movement, activity and action: The role of knowledge in the perception of motion. Philosophical Transactions of the Royal Society B: Biological Sciences, 352(1358), 1257\u20131265.","journal-title":"Philosophical Transactions of the Royal Society B: Biological Sciences"},{"issue":"11","key":"862_CR6","doi-asserted-by":"crossref","first-page":"3641","DOI":"10.1016\/j.patcog.2014.05.007","volume":"47","author":"J Bootkrajang","year":"2014","unstructured":"Bootkrajang, J., & Kab\u00e1n, A. (2014). Learning kernel logistic regression in the presence of class label noise. Pattern Recognition, 47(11), 3641\u20133655.","journal-title":"Pattern Recognition"},{"issue":"5","key":"862_CR7","doi-asserted-by":"crossref","first-page":"770","DOI":"10.1109\/TPAMI.2009.57","volume":"32","author":"L Bruzzone","year":"2010","unstructured":"Bruzzone, L., & Marconcini, M. (2010). Domain adaptation problems: A DASVM classification technique and a circular validation strategy. T-PAMI, 32(5), 770\u2013787.","journal-title":"T-PAMI"},{"key":"862_CR8","doi-asserted-by":"crossref","unstructured":"Bunescu, R. C., & Mooney, R. J. (2007). Multiple instance learning for sparse positive bags. In International Conference on Machine learning (ICML) (pp. 105\u2013112).","DOI":"10.1145\/1273496.1273510"},{"key":"862_CR9","doi-asserted-by":"crossref","unstructured":"Chang, S. F., Ellis, D., Jiang, W., Lee, K., Yanagawa, A., Loui, A. C., & Luo, J. (2007). Large-scale multimodal semantic concept detection for consumer video. In International Workshop on Multimedia Information Retrieval (pp. 255\u2013264).","DOI":"10.1145\/1290082.1290118"},{"key":"862_CR10","doi-asserted-by":"crossref","unstructured":"Chen, L., Duan, L., & Xu, D. (2013a) Event recognition in videos by learning from heterogeneous web sources. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 2666\u20132673).","DOI":"10.1109\/CVPR.2013.344"},{"key":"862_CR11","doi-asserted-by":"crossref","unstructured":"Chen, X., Shrivastava, A., & Gupta, A. (2013b) NEIL: Extracting visual knowledge from web data. In IEEE International Conference on Computer Vision (ICCV) (pp. 1409\u20131416).","DOI":"10.1109\/ICCV.2013.178"},{"issue":"12","key":"862_CR12","doi-asserted-by":"crossref","first-page":"1931","DOI":"10.1109\/TPAMI.2006.248","volume":"28","author":"Y Chen","year":"2006","unstructured":"Chen, Y., Bi, J., & Wang, J. Z. (2006). MILES: Multiple-instance learning via embedded instance selection. T-PAMI, 28(12), 1931\u20131947.","journal-title":"T-PAMI"},{"key":"862_CR13","doi-asserted-by":"crossref","unstructured":"Chu, W. S., DelaTorre, F., & Cohn, J. (2013) Selective transfer machine for personalized facial action unit detection. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 3515\u20133522).","DOI":"10.1109\/CVPR.2013.451"},{"issue":"11","key":"862_CR14","doi-asserted-by":"crossref","first-page":"3280","DOI":"10.1109\/TIP.2011.2159227","volume":"20","author":"L Duan","year":"2011","unstructured":"Duan, L., Li, W., Tsang, I. W., & Xu, D. (2011). Improving web image search by bag-based re-ranking. T-IP, 20(11), 3280\u20133290.","journal-title":"T-IP"},{"issue":"3","key":"862_CR15","doi-asserted-by":"crossref","first-page":"465","DOI":"10.1109\/TPAMI.2011.114","volume":"34","author":"L Duan","year":"2012","unstructured":"Duan, L., Tsang, I. W., & Xu, D. (2012a). Domain transfer multiple kernel learning. T-PAMI, 34(3), 465\u2013479.","journal-title":"T-PAMI"},{"key":"862_CR16","doi-asserted-by":"crossref","unstructured":"Duan, L., Xu, D., & Chang, S. F. (2012b). Exploiting web images for event recognition in consumer videos: A multiple source domain adaptation approach. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 1338\u20131345).","DOI":"10.1109\/CVPR.2012.6247819"},{"issue":"3","key":"862_CR17","first-page":"504","volume":"23","author":"L Duan","year":"2012","unstructured":"Duan, L., Xu, D., & Tsang, I. W. (2012c). Domain adaptation from multiple sources: A domain-dependent regularization approach. T-NNLS, 23(3), 504\u2013518.","journal-title":"T-NNLS"},{"issue":"9","key":"862_CR18","doi-asserted-by":"crossref","first-page":"1667","DOI":"10.1109\/TPAMI.2011.265","volume":"34","author":"L Duan","year":"2012","unstructured":"Duan, L., Xu, D., Tsang, I. W., & Luo, J. (2012d). Visual event recognition in videos by learning from web data. T-PAMI, 34(9), 1667\u20131680.","journal-title":"T-PAMI"},{"key":"862_CR19","doi-asserted-by":"crossref","unstructured":"Farhadi, A., Endres, I., Hoiem, D., & Forsyth, D. (2009). Describing objects by their attributes. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 1778\u20131785).","DOI":"10.1109\/CVPR.2009.5206772"},{"key":"862_CR20","unstructured":"Farquhar, J. D. R., Hardoon, D. R., Meng, H., Shawe-Taylor, J., & Szedmak, S. (2005). Two view learning: SVM-2K, theory and practice. In NIPS."},{"key":"862_CR21","doi-asserted-by":"crossref","unstructured":"Fergus, R., Fei-Fei, L., Perona, P., & Zisserman, A. (2005). Learning object categories from Google\u2019s image search. In ICCV.","DOI":"10.1109\/ICCV.2005.142"},{"key":"862_CR22","doi-asserted-by":"crossref","unstructured":"Fernando, B., Habrard, A., Sebban, M., & Tuytelaars, T. (2013). Unsupervised visual domain adaptation using subspace alignment. In ICCV.","DOI":"10.1109\/ICCV.2013.368"},{"key":"862_CR23","unstructured":"Ferrari, V., & Zisserman, A. (2007). Learning visual attributes. In Advances in Neural Information Processing Systems (NIPS) (pp. 433\u2013440)."},{"issue":"7","key":"862_CR24","first-page":"1086","volume":"24","author":"S Fouad","year":"2013","unstructured":"Fouad, S., Tino, P., Raychaudhury, S., & Schneider, P. (2013). Incorporating privileged information through metric learning. T-NNLS, 24(7), 1086\u20131098.","journal-title":"T-NNLS"},{"key":"862_CR25","unstructured":"Gehler, P. V., & Nowozin, S. (2008). Infinite kernel learning.Tech. rep., Max Planck Institute for Biological Cybernetics. In NIPS Workshop on Kernel Learning: Automatic Selection of Optimal Kernels."},{"key":"862_CR26","unstructured":"Gong, B., Shi, Y., Sha, F., & Grauman, K. (2012). Geodesic flow kernel for unsupervised domain adaptation. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 2066\u20132073)."},{"key":"862_CR27","doi-asserted-by":"crossref","unstructured":"Gopalan, R., Li, R., & Chellappa, R. (2011). Domain adaptation for object recognition: An unsupervised approach. In IEEE International Conference on Computer Vision (ICCV) (pp. 999\u20131006).","DOI":"10.1109\/ICCV.2011.6126344"},{"key":"862_CR28","first-page":"723","volume":"13","author":"A Gretton","year":"2012","unstructured":"Gretton, A., Rasch, K. M., Schlkopf, B., & Smola, A. (2012). A kernel two-sample test. JMLR, 13, 723\u2013773.","journal-title":"JMLR"},{"issue":"12","key":"862_CR29","doi-asserted-by":"crossref","first-page":"2639","DOI":"10.1162\/0899766042321814","volume":"16","author":"DR Hardoon","year":"2004","unstructured":"Hardoon, D. R., Szedmak, S., & Shawe-taylor, J. (2004). Canonical correlation analysis: An overview with application to learning methods. Neural Computation, 16(12), 2639\u20132664.","journal-title":"Neural Computation"},{"key":"862_CR30","unstructured":"Hu, Y., Cao, L., Lv, F., Yan, S., Gong, Y., & Huang, T. S. (2009). Action detection in complex scenes with spatial and temporal ambiguities. In IEEE International Conference on Computer Vision (ICCV) (pp. 128\u2013135)."},{"key":"862_CR31","doi-asserted-by":"crossref","unstructured":"Huang, J., Smola, A., Gretton, A., Borgwardt, K., & Scholkopf, B. (2007). Correcting sample selection bias by unlabeled data. In Advances in Neural Information Processing Systems (NIPS) (pp. 601\u2013608).","DOI":"10.7551\/mitpress\/7503.003.0080"},{"issue":"2","key":"862_CR32","doi-asserted-by":"crossref","first-page":"134","DOI":"10.1007\/s11263-011-0494-3","volume":"100","author":"SJ Hwang","year":"2012","unstructured":"Hwang, S. J., & Grauman, K. (2012). Learning the relative importance of objects from tagged images for retrieval and cross-modal search. IJCV, 100(2), 134\u2013153.","journal-title":"IJCV"},{"key":"862_CR33","doi-asserted-by":"crossref","unstructured":"Jiang, Y. G., Ye, G., Chang, S. F., Ellis, D., & Loui, A. C. (2011). Consumer video understanding: A benchmark database and an evaluation of human and machine performance. In International Conference on Multimedia Retrieval (ICMR) (p. 29).","DOI":"10.1145\/1991996.1992025"},{"issue":"2","key":"862_CR34","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1007\/s13735-012-0024-2","volume":"2","author":"YG Jiang","year":"2013","unstructured":"Jiang, Y. G., Bhattacharya, S., Chang, S. F., & Shah, M. (2013). High-level event recognition in unconstrained videos. International Journal of Multimedia Information Retrieval, 2(2), 73\u2013101.","journal-title":"International Journal of Multimedia Information Retrieval"},{"key":"862_CR35","unstructured":"Kloft, M., Brefeld, U., Sonnenburg, S., & Zien, A. (2011). $${\\ell }_\\text{ p }$$ \u2113 p -norm multiple kernel learning. JMLR, 12, 953\u2013997."},{"key":"862_CR36","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T., & Serre, T. (2011). HMDB: a large video database for human motion recognition. In IEEE International Conference on Computer Vision (ICCV) (pp. 2556\u20132563).","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"862_CR37","doi-asserted-by":"crossref","unstructured":"Kulis, B., Saenko, K., & Darrell, T. (2011). What you saw is not what you get: Domain adaptation using asymmetric kernel transforms. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 1785\u20131792).","DOI":"10.1109\/CVPR.2011.5995702"},{"key":"862_CR38","doi-asserted-by":"crossref","unstructured":"Le, Q. V., Zou, W. Y., Yeung, S. Y., & Ng, A.Y. (2011). Learning hierarchical invariant spatio-temporal features for action recognition with independent subspace analysis. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 3361\u20133368).","DOI":"10.1109\/CVPR.2011.5995496"},{"key":"862_CR39","doi-asserted-by":"crossref","unstructured":"Leung, T., Song, Y., & Zhang, J. (2011). Handling label noise in video classification via multiple instance learning. In IEEE International Conference on Computer Vision (ICCV) (pp. 2056\u20132063).","DOI":"10.1109\/ICCV.2011.6126479"},{"key":"862_CR40","doi-asserted-by":"crossref","unstructured":"Li, Q., Wu, J., & Tu, Z. (2013). Harvesting mid-level visual concepts from large-scale Internet images. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 851\u2013858).","DOI":"10.1109\/CVPR.2013.115"},{"key":"862_CR41","doi-asserted-by":"crossref","unstructured":"Li, W., Duan, L., Xu, D., & Tsang, I. W. (2011). Text-based image retrieval using progressive multi-instance learning. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 2368\u20132375).","DOI":"10.1109\/ICCV.2011.6126478"},{"key":"862_CR42","doi-asserted-by":"crossref","unstructured":"Li, W., Duan, L., Tsang, I.W., & Xu, D. (2012a). Batch mode adaptive multiple instance learning for computer vision tasks. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 2368\u20132375).","DOI":"10.1109\/CVPR.2012.6247949"},{"key":"862_CR43","doi-asserted-by":"crossref","unstructured":"Li, W., Duan, L., Tsang, I.W., & Xu, D. (2012b). Co-labeling: A new multi-view learning approach for ambiguous problems. In IEEE International Conference on Data Mining (ICDM) (pp. 419\u2013428).","DOI":"10.1109\/ICDM.2012.78"},{"issue":"6","key":"862_CR44","doi-asserted-by":"crossref","first-page":"1134","DOI":"10.1109\/TPAMI.2013.167","volume":"36","author":"W Li","year":"2014","unstructured":"Li, W., Duan, L., Xu, D., & Tsang, I. W. (2014a). Learning with augmented features for supervised and semi-supervised heterogeneous domain adaptation. T-PAMI, 36(6), 1134\u20131148.","journal-title":"T-PAMI"},{"key":"862_CR45","doi-asserted-by":"crossref","unstructured":"Li, W., Niu, L., & Xu, D. (2014b). Exploiting privileged information from web data for image categorization. In European Conference on Computer Vision (ECCV) (pp. 437\u2013452).","DOI":"10.1007\/978-3-319-10602-1_29"},{"key":"862_CR46","unstructured":"Li, Y.-F., Tsang, I. W., Kwok, J. T., & Zhou, Z.-H. (2009). Tighter and convex maximum margin clustering. In International Conference on Artificial Intelligence and Statistics (pp. 344\u2013351)."},{"key":"862_CR47","doi-asserted-by":"crossref","first-page":"766","DOI":"10.1016\/j.neunet.2009.06.030","volume":"22","author":"L Liang","year":"2009","unstructured":"Liang, L., Cai, F., & Cherkassky, V. (2009). Predictive learning with structured (grouped) data. Neural Networks, 22, 766\u2013773.","journal-title":"Neural Networks"},{"key":"862_CR48","unstructured":"Lin, Z., Jiang, Z., & Davis, L. S. (2009). Recognizing actions by shape-motion prototype trees. In IEEE International Conference on Computer Vision (ICCV) (pp. 444\u2013451)."},{"key":"862_CR49","doi-asserted-by":"crossref","unstructured":"Loui, A., Luo, J., Chang, S. F., Ellis, D., Jiang, W., Kennedy, L., Lee, K., & Yanagawa, A. (2007). Kodak\u2019s consumer video benchmark data set: concept definition and annotation. In International Workshop on Multimedia Information Retrieval (pp. 245\u2013254).","DOI":"10.1145\/1290082.1290117"},{"key":"862_CR50","doi-asserted-by":"crossref","unstructured":"Morariu, V.I., & Davis, L.S. (2011). Multi-agent event recognition in structured scenarios. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 3289\u20133296).","DOI":"10.1109\/CVPR.2011.5995386"},{"key":"862_CR51","unstructured":"Natarajan, N., Dhillon, I. S., Ravikumar, P. K., & Tewari, A. (2013). Learning with noisy labels. In Advances in Neural Information Processing Systems, pp 1196\u20131204."},{"issue":"2","key":"862_CR52","first-page":"199","volume":"22","author":"SJ Pan","year":"2011","unstructured":"Pan, S. J., Tsang, I. W., Kwok, J. T., & Yang, Q. (2011). Domain adaptation via transfer component analysis. T-NN, 22(2), 199\u2013210.","journal-title":"T-NN"},{"issue":"4","key":"862_CR53","doi-asserted-by":"crossref","first-page":"754","DOI":"10.1109\/TPAMI.2010.133","volume":"33","author":"F Schroff","year":"2011","unstructured":"Schroff, F., Criminisi, A., & Zisserman, A. (2011). Harvesting image databases from the web. T-PAMI, 33(4), 754\u2013766.","journal-title":"T-PAMI"},{"key":"862_CR54","doi-asserted-by":"crossref","unstructured":"Sharmanska, V., Quadrianto, N., Lampert, C. H. (2013). Learning to rank using privileged information. In IEEE International Conference on Computer Vision (ICCV) (pp. 825\u2013832).","DOI":"10.1109\/ICCV.2013.107"},{"key":"862_CR55","unstructured":"Shi, Y., Huang, Y., Minnen, D., Bobick, A., & Essa, I. (2004). Propagation networks for recognition of partially ordered sequential action. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (vol. 2, pp. II-862\u2013II-869)."},{"key":"862_CR56","doi-asserted-by":"crossref","unstructured":"Torralba, A., & Efros, A.A. (2011). Unbiased look at dataset bias. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 1521\u20131528).","DOI":"10.1109\/CVPR.2011.5995347"},{"issue":"11","key":"862_CR57","doi-asserted-by":"crossref","first-page":"1958","DOI":"10.1109\/TPAMI.2008.128","volume":"30","author":"A Torralba","year":"2008","unstructured":"Torralba, A., Fergus, R., & Freeman, W. T. (2008). 80 million tiny images: A large data set for nonparametric object and scene recognition. T-PAMI, 30(11), 1958\u20131970.","journal-title":"T-PAMI"},{"key":"862_CR58","doi-asserted-by":"crossref","unstructured":"Torresani, L., Szummer, M., & Fitzgibbon, A. (2010). Efficient object category recognition using classemes. In European Conference on Computer Vision (ECCV) (pp. 776\u2013789).","DOI":"10.1007\/978-3-642-15549-9_56"},{"key":"862_CR59","doi-asserted-by":"crossref","unstructured":"Tran, S. D., & Davis, L. S. (2008). Event modeling and recognition using markov logic networks. In European Conference on Computer Vision (ECCV) (pp. 610\u2013623).","DOI":"10.1007\/978-3-540-88688-4_45"},{"key":"862_CR60","doi-asserted-by":"crossref","first-page":"544","DOI":"10.1016\/j.neunet.2009.06.042","volume":"22","author":"V Vapnik","year":"2009","unstructured":"Vapnik, V., & Vashist, A. (2009). A new learning paradigm: Learning using privileged infromatin. Neural Networks, 22, 544\u2013557.","journal-title":"Neural Networks"},{"key":"862_CR61","doi-asserted-by":"crossref","unstructured":"Vijayanarasimhan, S., & Grauman, K. (2008). Keywords to visual categories: Multiple-instance learning for weakly supervised object categorization. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 1\u20138).","DOI":"10.1109\/CVPR.2008.4587632"},{"key":"862_CR62","doi-asserted-by":"crossref","unstructured":"Wang, H., & Schmid, C. (2013). Action recognition with improved trajectories. In IEEE International Conference on Computer Vision (ICCV) (pp. 3551\u20133558).","DOI":"10.1109\/ICCV.2013.441"},{"key":"862_CR63","doi-asserted-by":"crossref","unstructured":"Wang, H., Klaser, A., Schmid, C., & Liu, C. L. (2011a). Action recognition by dense trajectories. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 3169\u20133176).","DOI":"10.1109\/CVPR.2011.5995407"},{"key":"862_CR64","doi-asserted-by":"crossref","unstructured":"Wang, L., Wang, Y., & Gao, W. (2011b). Mining layered grammar rules for action recognition. International Journal of Computer Vision, 93(2), 162\u2013182.","DOI":"10.1007\/s11263-010-0393-z"},{"key":"862_CR65","doi-asserted-by":"crossref","unstructured":"Xu, D., & Chang, S. F. (2008). Video event recognition using kernel methods with multilevel temporal alignment. Pattern Analysis and Machine Intelligence, IEEE Transactions on, 30(11), 1985\u20131997.","DOI":"10.1109\/TPAMI.2008.129"},{"key":"862_CR66","doi-asserted-by":"crossref","unstructured":"Yu, T. H., Kim, T.K., & Cipolla, R. (2010). Real-time action recognition by spatiotemporal semantic and structural forests. In The British Machine Vision Conference (BMVC) (p. 52.1\u201352.12).","DOI":"10.5244\/C.24.52"},{"key":"862_CR67","doi-asserted-by":"crossref","unstructured":"Zeng, Z., & Ji, Q. (2010). Knowledge based activity recognition with dynamic bayesian network. In European Conference on Computer Vision (ECCV) (pp. 532\u2013546).","DOI":"10.1007\/978-3-642-15567-3_39"},{"key":"862_CR68","unstructured":"Zhou, Z., & Zhang, M. (2006). Multi-instance multi-label learning with application to scene classification. In Advances in neural information processing systems (NIPS) (pp. 1609\u20131616)."},{"key":"862_CR69","doi-asserted-by":"crossref","unstructured":"Zhu, G., Yang, M., Yu, K., Xu, W., & Gong, Y. (2009). Detecting video events based on action recognition in complex scenes using spatio-temporal descriptor. In Proceedings of the 17th ACM international conference on Multimedia (pp. 165\u2013174). ACM.","DOI":"10.1145\/1631272.1631297"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-015-0862-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-015-0862-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-015-0862-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T11:44:47Z","timestamp":1748691887000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-015-0862-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,11,13]]},"references-count":69,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2016,6]]}},"alternative-id":["862"],"URL":"https:\/\/doi.org\/10.1007\/s11263-015-0862-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2015,11,13]]}}}