{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T09:02:06Z","timestamp":1758272526337,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030110178"},{"type":"electronic","value":"9783030110185"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-11018-5_28","type":"book-chapter","created":{"date-parts":[[2019,1,24]],"date-time":"2019-01-24T05:50:50Z","timestamp":1548309050000},"page":"306-316","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Temporal Attention Mechanism with Conditional Inference for Large-Scale Multi-label Video Classification"],"prefix":"10.1007","author":[{"given":"Eun-Sol","family":"Kim","sequence":"first","affiliation":[]},{"given":"Kyoung-Woon","family":"On","sequence":"additional","affiliation":[]},{"given":"Jongseok","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Yu-Jung","family":"Heo","sequence":"additional","affiliation":[]},{"given":"Seong-Ho","family":"Choi","sequence":"additional","affiliation":[]},{"given":"Hyun-Dong","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Byoung-Tak","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,1,23]]},"reference":[{"key":"28_CR1","unstructured":"Abu-El-Haija, S., Kothari, N., Lee, J., Natsev, P., Toderici, G., Varadarajan, B., Vijayanarasimhan, S.: Youtube-8m: a large-scale video classification benchmark. arXiv preprint arXiv:1609.08675 (2016)"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Gronat, P., Torii, A., Pajdla, T., Sivic, J.: Netvlad: CNN architecture for weakly supervised place recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5297\u20135307 (2016)","DOI":"10.1109\/CVPR.2016.572"},{"key":"28_CR3","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Zisserman, A.: All about VLAD. In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, pp. 1578\u20131585 (2013)","DOI":"10.1109\/CVPR.2013.207"},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Cho, K., et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)","DOI":"10.3115\/v1\/D14-1179"},{"key":"28_CR5","unstructured":"Chung, J., Ahn, S., Bengio, Y.: Hierarchical multiscale recurrent neural networks. arXiv preprint arXiv:1609.01704 (2016)"},{"key":"28_CR6","unstructured":"Chung, J., Gulcehre, C., Cho, K., Bengio, Y.: Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)"},{"key":"28_CR7","first-page":"279","volume":"10","author":"K Dembczynski","year":"2010","unstructured":"Dembczynski, K., Cheng, W., H\u00fcllermeier, E.: Bayes optimal multilabel classification via probabilistic classifier chains. ICML. 10, 279\u2013286 (2010)","journal-title":"ICML."},{"key":"28_CR8","doi-asserted-by":"crossref","unstructured":"Fukui, A., Park, D.H., Yang, D., Rohrbach, A., Darrell, T., Rohrbach, M.: Multimodal compact bilinear pooling for visual question answering and visual grounding. arXiv preprint arXiv:1606.01847 (2016)","DOI":"10.18653\/v1\/D16-1044"},{"key":"28_CR9","doi-asserted-by":"crossref","unstructured":"Hershey, S., et al.: CNN architectures for large-scale audio classification. In: International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2017). https:\/\/arxiv.org\/abs\/1609.09430","DOI":"10.1109\/ICASSP.2017.7952132"},{"issue":"8","key":"28_CR10","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"28_CR11","unstructured":"Kim, J.H., On, K.W., Lim, W., Kim, J., Ha, J.W., Zhang, B.T.: Hadamard product for low-rank bilinear pooling. arXiv preprint arXiv:1610.04325 (2016)"},{"key":"28_CR12","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"28_CR13","unstructured":"Miech, A., Laptev, I., Sivic, J.: Learnable pooling with context gating for video classification. arXiv preprint arXiv:1706.06905 (2017)"},{"key":"28_CR14","unstructured":"Na, S., Yu, Y., Lee, S., Kim, J., Kim, G.: Encoding video and label priors for multi-label video classification on youtube-8m dataset. arXiv preprint arXiv:1706.07960 (2017)"},{"key":"28_CR15","unstructured":"Nam, J., Menc\u00eda, E.L., Kim, H.J., F\u00fcrnkranz, J.: Maximizing subset accuracy with recurrent neural networks in multi-label classification. In: Advances in Neural Information Processing Systems, pp. 5413\u20135423 (2017)"},{"key":"28_CR16","doi-asserted-by":"crossref","unstructured":"Philbin, J., Chum, O., Isard, M., Sivic, J., Zisserman, A.: Object retrieval with large vocabularies and fast spatial matching. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2007, pp. 1\u20138. IEEE (2007)","DOI":"10.1109\/CVPR.2007.383172"},{"key":"28_CR17","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"254","DOI":"10.1007\/978-3-642-04174-7_17","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"J Read","year":"2009","unstructured":"Read, J., Pfahringer, B., Holmes, G., Frank, E.: Classifier chains for multi-label classification. In: Buntine, W., Grobelnik, M., Mladeni\u0107, D., Shawe-Taylor, J. (eds.) ECML PKDD 2009, Part II. LNCS (LNAI), vol. 5782, pp. 254\u2013269. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-04174-7_17"},{"issue":"3","key":"28_CR18","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1007\/s10994-011-5256-5","volume":"85","author":"J Read","year":"2011","unstructured":"Read, J., Pfahringer, B., Holmes, G., Frank, E.: Classifier chains for multi-label classification. Mach. Learn. 85(3), 333 (2011)","journal-title":"Mach. Learn."},{"key":"28_CR19","doi-asserted-by":"crossref","unstructured":"Sivic, J., Zisserman, A.: Video Google: a text retrieval approach to object matching in videos. In: null, p. 1470. IEEE (2003)","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"28_CR20","unstructured":"Tensorflow: Tensorflow: image recognition. https:\/\/www.tensorflow.org\/tutorials\/images\/image_recognition"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-11018-5_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,22]],"date-time":"2023-01-22T01:21:17Z","timestamp":1674350477000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-11018-5_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030110178","9783030110185"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-11018-5_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"23 January 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}