{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T04:03:50Z","timestamp":1750133030394,"version":"3.41.0"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319541839"},{"type":"electronic","value":"9783319541846"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-54184-6_2","type":"book-chapter","created":{"date-parts":[[2017,3,9]],"date-time":"2017-03-09T15:44:25Z","timestamp":1489074265000},"page":"19-34","source":"Crossref","is-referenced-by-count":0,"title":["Learning Action Concept Trees and Semantic Alignment Networks from Image-Description Data"],"prefix":"10.1007","author":[{"given":"Jiyang","family":"Gao","sequence":"first","affiliation":[]},{"given":"Ram","family":"Nevatia","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,3,10]]},"reference":[{"key":"2_CR1","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: NIPS (2012)"},{"key":"2_CR2","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: ICLR (2015)"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Krishna, R., Zhu, Y., Groth, O., Johnson, J., Hata, K., Kravitz, J., Chen, S., Kalantidis, Y., Li, L.J., Shamma, D.A., Bernstein, M., Fei-Fei, L.: Visual genome: connecting language and vision using crowdsourced dense image annotations (2016)","DOI":"10.1007\/s11263-016-0981-7"},{"key":"2_CR4","doi-asserted-by":"crossref","first-page":"3343","DOI":"10.1016\/j.patcog.2014.04.018","volume":"47","author":"G Guo","year":"2014","unstructured":"Guo, G., Lai, A.: A survey on still image based human action recognition. Pattern Recogn. 47, 3343\u20133361 (2014)","journal-title":"Pattern Recogn."},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., Sivic, J.: Learning and transferring mid-level image representations using convolutional neural networks. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.222"},{"key":"2_CR6","unstructured":"Gkioxari, G., Hariharan, B., Girshick, R., Malik, J.: R-CNNs for pose estimation and action detection. arXiv preprint arxiv:1406.5212 (2014)"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Girshick, R., Malik, J.: Contextual action recognition with R*CNN. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.129"},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"Chao, Y.W., Wang, Z., He, Y., Wang, J., Deng, J.: Hico: A benchmark for recognizing human-object interactions in images. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.122"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Ramanathan, V., Li, C., Deng, J., Han, W., Li, Z., Gu, K., Song, Y., Bengio, S., Rossenberg, C., Fei-Fei, L.: Learning semantic relationships for better action retrieval in images. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298713"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Rohrbach, A., Rohrbach, M., Tandon, N., Schiele, B.: A dataset for movie description. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298940"},{"key":"2_CR11","unstructured":"Torabi, A., Pal, C., Larochelle, H., Courville, A.: Using descriptive video services to create a large data source for video annotation research. arXiv preprint arxiv:1503.01070 (2015)"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Alayrac, J.B., Bojanowski, P., Agrawal, N., Sivic, J., Laptev, I., Lacoste-Julien, S.: Learning from narrated instruction videos (2016)","DOI":"10.1109\/CVPR.2016.495"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Ramanathan, V., Liang, P., Fei-Fei, L.: Video event understanding using natural language descriptions. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.117"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Yu, S.I., Jiang, L., Hauptmann, A.: Instructional videos for unsupervised harvesting and learning of action examples. In: ACM MM (2014)","DOI":"10.1145\/2647868.2654997"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T., Serre, T.: HMDB: a large video database for human motion recognition. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"2_CR16","doi-asserted-by":"crossref","first-page":"971","DOI":"10.1007\/s00138-012-0450-4","volume":"24","author":"KK Reddy","year":"2013","unstructured":"Reddy, K.K., Shah, M.: Recognizing 50 human action categories of web videos. Mach. Vis. Appl. 24, 971\u2013981 (2013)","journal-title":"Mach. Vis. Appl."},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Sun, C., Gan, C., Nevatia, R.: Automatic concept discovery from parallel text and visual corpora. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.298"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Gao, J., Sun, C., Nevatia, R.: ACD: action concept discovery from image-sentence corpora. In: ICMR (2016)","DOI":"10.1145\/2911996.2912014"},{"key":"2_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Heidelberg (2014). doi: 10.1007\/978-3-319-10602-1_48"},{"key":"2_CR20","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1162\/tacl_a_00166","volume":"2","author":"P Young","year":"2014","unstructured":"Young, P., Lai, A., Hodosh, M., Hockenmaier, J.: From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions. TACL 2, 67\u201378 (2014)","journal-title":"TACL"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Donahue, J., Anne Hendricks, L., Guadarrama, S., Rohrbach, M., Venugopalan, S., Saenko, K., Darrell, T.: Long-term recurrent convolutional networks for visual recognition and description. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Hu, R., Xu, H., Rohrbach, M., Feng, J., Saenko, K., Darrell, T.: Natural language object retrieval. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.493"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Hu, R., Rohrbach, M., Darrell, T.: Segmentation from natural language expressions. arXiv preprint arxiv:1603.06180 (2016)","DOI":"10.1007\/978-3-319-46448-0_7"},{"key":"2_CR26","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1145\/219717.219748","volume":"38","author":"GA Miller","year":"1995","unstructured":"Miller, G.A.: Wordnet: a lexical database for english. Commun. ACM 38, 39\u201341 (1995)","journal-title":"Commun. ACM"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Cao, S., Chen, K., Nevatia, R.: Abstraction hierarchy and self annotation update for fine grained activity recognition. In: WACV (2016)","DOI":"10.1109\/WACV.2016.7477583"},{"key":"2_CR28","unstructured":"Abadi, M., Agarwal, A., Barham, P., Brevdo, E., Chen, Z., Citro, C., Corrado, G.S., Davis, A., Dean, J., Devin, M., Ghemawat, S., Goodfellow, I., Harp, A., Irving, G., Isard, M., Jia, Y., Jozefowicz, R., Kaiser, L., Kudlur, M., Levenberg, J., Man\u00e9, D., Monga, R., Moore, S., Murray, D., Olah, C., Schuster, M., Shlens, J., Steiner, B., Sutskever, I., Talwar, K., Tucker, P., Vanhoucke, V., Vasudevan, V., Vi\u00e9gas, F., Vinyals, O., Warden, P., Wattenberg, M., Wicke, M., Yu, Y., Zheng, X.: TensorFlow: large-scale machine learning on heterogeneous systems (2015). https:\/\/www.tensorflow.org\/"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2_CR30","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. In: NIPS (2013)"},{"key":"2_CR31","unstructured":"Kingma, D., Ba, J.: Adam: a method for stochastic optimization. In: ICLR (2015)"},{"key":"2_CR32","first-page":"1871","volume":"9","author":"RE Fan","year":"2008","unstructured":"Fan, R.E., Chang, K.W., Hsieh, C.J., Wang, X.R., Lin, C.J.: LIBLINEAR: a library for large linear classification. JMLR 9, 1871\u20131874 (2008)","journal-title":"JMLR"},{"key":"2_CR33","unstructured":"Frome, A., Corrado, G.S., Shlens, J., Bengio, S., Dean, J., Mikolov, T., et al.: Devise: a deep visual-semantic embedding model. In: NIPS (2013)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2016"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-54184-6_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,16]],"date-time":"2025-06-16T14:25:49Z","timestamp":1750083949000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-54184-6_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319541839","9783319541846"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-54184-6_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}