{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T20:03:49Z","timestamp":1743019429534,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":37,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811073045"},{"type":"electronic","value":"9789811073052"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-981-10-7305-2_10","type":"book-chapter","created":{"date-parts":[[2017,12,7]],"date-time":"2017-12-07T16:16:54Z","timestamp":1512663414000},"page":"108-119","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Scene Recognition with Sequential Object Context"],"prefix":"10.1007","author":[{"given":"Yuelian","family":"Wang","sequence":"first","affiliation":[]},{"given":"Wei","family":"Pan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,12,8]]},"reference":[{"issue":"1","key":"10_CR1","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s11263-016-0966-6","volume":"123","author":"A Agrawal","year":"2017","unstructured":"Agrawal, A., Lu, J., Antol, S., Mitchell, M., Zitnick, C.L., Parikh, D., Batra, D.: VQA: visual question answering. Int. J. Comput. Vis. 123(1), 4\u201331 (2017)","journal-title":"Int. J. Comput. Vis."},{"doi-asserted-by":"crossref","unstructured":"Bell, S., Zitnick, C.L., Bala, K., Girshick, R.: Inside-outside net: detecting objects in context with skip pooling. In: CVPR (2016)","key":"10_CR2","DOI":"10.1109\/CVPR.2016.314"},{"doi-asserted-by":"crossref","unstructured":"Byeon, W., Breuel, T.M., Raue, F., Liwicki, M.R.: Scene labeling with LSTM recurrent neural networks. In: CVPR (2015)","key":"10_CR3","DOI":"10.1109\/CVPR.2015.7298977"},{"doi-asserted-by":"crossref","unstructured":"Chatfield, K., Simonyan, K., Vedaldi, A., Zisserman, A.: Return of the devil in the details: delving deep into convolutional nets. In: Proceedings of the British Machine Vision Conference, BMVC 2014 (2014)","key":"10_CR4","DOI":"10.5244\/C.28.6"},{"issue":"7","key":"10_CR5","doi-asserted-by":"publisher","first-page":"853","DOI":"10.1016\/j.patrec.2011.12.004","volume":"33","author":"MJ Choi","year":"2012","unstructured":"Choi, M.J., Torralba, A., Willsky, A.S.: Context models and out-of-context objects. Pattern Recogn. Lett. 33(7), 853\u2013862 (2012)","journal-title":"Pattern Recogn. Lett."},{"doi-asserted-by":"crossref","unstructured":"Dixit, M., Chen, S., Gao, D., Rasiwasia, N., Vasconcelos, N.: Scene classification with semantic fisher vectors. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2015, pp. 2974\u20132983 (2015)","key":"10_CR6","DOI":"10.1109\/CVPR.2015.7298916"},{"key":"10_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"220","DOI":"10.1007\/978-3-540-74695-9_23","volume-title":"Artificial Neural Networks \u2013 ICANN 2007","author":"S Fern\u00e1ndez","year":"2007","unstructured":"Fern\u00e1ndez, S., Graves, A., Schmidhuber, J.: An application of recurrent neural networks to discriminative keyword spotting. In: de S\u00e1, J.M., Alexandre, L.A., Duch, W., Mandic, D. (eds.) ICANN 2007. LNCS, vol. 4669, pp. 220\u2013229. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74695-9_23"},{"key":"10_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"392","DOI":"10.1007\/978-3-319-10584-0_26","volume-title":"Computer Vision \u2013 ECCV 2014","author":"Y Gong","year":"2014","unstructured":"Gong, Y., Wang, L., Guo, R., Lazebnik, S.: Multi-scale orderless pooling of deep convolutional activation features. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8695, pp. 392\u2013407. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_26"},{"doi-asserted-by":"crossref","unstructured":"Goyal, Y., Khot, T., Summers-Stay, D., Batra, D., Parikh, D.: Making the V in VQA matter: elevating the role of image understanding in Visual Question Answering. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","key":"10_CR9","DOI":"10.1109\/CVPR.2017.670"},{"doi-asserted-by":"crossref","unstructured":"Herranz, L., Jiang, S., Li, X.: Scene recognition with CNNs: objects, scales and dataset bias. In: CVPR (2016)","key":"10_CR10","DOI":"10.1109\/CVPR.2016.68"},{"key":"10_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1007\/978-3-319-46448-0_7","volume-title":"Computer Vision \u2013 ECCV 2016","author":"R Hu","year":"2016","unstructured":"Hu, R., Rohrbach, M., Darrell, T.: Segmentation from natural language expressions. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 108\u2013124. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_7"},{"doi-asserted-by":"crossref","unstructured":"Izadinia, H., Sadeghi, F., Farhadi, A.: Incorporating scene context and object layout into appearance modeling. In: CVPR (2014)","key":"10_CR12","DOI":"10.1109\/CVPR.2014.37"},{"doi-asserted-by":"crossref","unstructured":"Jia, Y., Shelhamer, E., Donahue, J., Karayev, S., Long, J., Girshick, R., Guadarrama, S., Darrell, T.: Caffe: convolutional architecture for fast feature embedding. In: Proceedings of the 22nd ACM International Conference on Multimedia, MM 2014, pp. 675\u2013678. ACM, New York (2014)","key":"10_CR13","DOI":"10.1145\/2647868.2654889"},{"unstructured":"Li, L., Su, H., Xing, E., Fei-Fei, L.: Object bank: a high-level image representation for scene classification and semantic feature sparsification. In: Advances in Neural Information Processing Systems (2010)","key":"10_CR14"},{"doi-asserted-by":"crossref","unstructured":"Li, X., Guo, Y.: An object co-occurrence assisted hierarchical model for scene understanding. In: Proceedings of the British Machine Vision Conference (2012)","key":"10_CR15","DOI":"10.5244\/C.26.81"},{"key":"10_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1007\/978-3-319-48896-7_26","volume-title":"Advances in Multimedia Information Processing - PCM 2016","author":"X Li","year":"2016","unstructured":"Li, X., Herranz, L., Jiang, S.: Heterogeneous convolutional neural networks for visual recognition. In: Chen, E., Gong, Y., Tie, Y. (eds.) PCM 2016. LNCS, vol. 9917, pp. 262\u2013274. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-48896-7_26"},{"doi-asserted-by":"crossref","unstructured":"Li, X., Song, X., Herranz, L., Zhu, Y., Jiang, S.: Image captioning with both object and scene information. In: Proceedings of the 2016 ACM on Multimedia Conference, MM 2016, pp. 1107\u20131110. ACM, New York (2016)","key":"10_CR17","DOI":"10.1145\/2964284.2984069"},{"key":"10_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/978-3-319-46448-0_8","volume-title":"Computer Vision \u2013 ECCV 2016","author":"X Liang","year":"2016","unstructured":"Liang, X., Shen, X., Feng, J., Lin, L., Yan, S.: Semantic object parsing with graph LSTM. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 125\u2013143. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_8"},{"unstructured":"Liao, Y., Kodagoda, S., Wang, Y., Shi, L., Liu, Y.: Understand scene categories by objects: a semantic regularized scene classifier using convolutional neural networks. In: IEEE International Conference on Robotics and Automation (ICRA) (2016)","key":"10_CR19"},{"key":"10_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"TY Lin","year":"2014","unstructured":"Lin, T.Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"10_CR21","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1109\/TASLP.2016.2520371","volume":"24","author":"H Palangi","year":"2016","unstructured":"Palangi, H., Deng, L., Shen, Y., Gao, J., He, X., Chen, J., Song, X., Ward, R.: Deep sentence embedding using long short-term memory networks: analysis and application to information retrieval. IEEE\/ACM Trans. Audio Speech Lang. Process. 24, 694\u2013707 (2016)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"doi-asserted-by":"crossref","unstructured":"Quattoni, A., Torralba, A.: Recognizing indoor scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, CVPR Workshops 2009, pp. 413\u2013420 (2009)","key":"10_CR22","DOI":"10.1109\/CVPR.2009.5206537"},{"unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NIPS (2015)","key":"10_CR23"},{"issue":"3","key":"10_CR24","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakvovsky","year":"2015","unstructured":"Russakvovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Kholsa, A., Bernstein, M., Berg, A., Fei-Fei, L.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vis. 115(3), 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vis."},{"doi-asserted-by":"crossref","unstructured":"Shuai, B., Zuo, Z., Wang, G., Wang, B.: DAG-Recurrent neural networks for scene labeling. In: CVPR (2016)","key":"10_CR25","DOI":"10.1109\/CVPR.2016.394"},{"unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: ICLR (2015)","key":"10_CR26"},{"doi-asserted-by":"crossref","unstructured":"Torralba, A., Murphy, K.P., Freeman, W.T., Rubin, M.A.: Context-based vision system for place and object recognition. In: ICCV (2003)","key":"10_CR27","DOI":"10.1109\/ICCV.2003.1238354"},{"issue":"3","key":"10_CR28","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1561\/0600000017","volume":"3","author":"T Tuytelaars","year":"2008","unstructured":"Tuytelaars, T., Mikolajczyk, K.: Local invariant feature detectors: a survey. Found. Trends. Comput. Graph. Vis. 3(3), 177\u2013280 (2008)","journal-title":"Found. Trends. Comput. Graph. Vis."},{"key":"10_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1007\/978-3-319-46478-7_9","volume-title":"Computer Vision \u2013 ECCV 2016","author":"RR Varior","year":"2016","unstructured":"Varior, R.R., Shuai, B., Lu, J., Xu, D., Wang, G.: A siamese long short-term memory architecture for human re-identification. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 135\u2013153. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_9"},{"doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: CVPR (2015)","key":"10_CR30","DOI":"10.1109\/CVPR.2015.7298935"},{"doi-asserted-by":"crossref","unstructured":"Wu, R., Wang, B., Wang, W., Yus, Y.: Harvesting discriminative meta objects with deep CNN features for scene classification. In: ICCV (2015)","key":"10_CR31","DOI":"10.1109\/ICCV.2015.152"},{"doi-asserted-by":"crossref","unstructured":"Xiao, J., Hays, J., Ehinger, K.A., Oliva, A., Torralba, A.: Sun database: large-scale scene recognition from abbey to zoo. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognitions, CVPR 2010, pp. 3485\u20133492 (2010)","key":"10_CR32","DOI":"10.1109\/CVPR.2010.5539970"},{"unstructured":"Xu, K., Ba, J., Kiros, R., Cho, K., Courville, A., Salakhutdinov, R., Zemel, R., Bengio, Y.: Show, attend and tell: neural image caption generation with visual attention. In: ICML (2015)","key":"10_CR33"},{"doi-asserted-by":"crossref","unstructured":"Yoo, D., Park, S., Lee, J.Y., Kweon, I.S.: Multi-scale pyramid pooling for deep convolutional representation. In: Computer Vision and Pattern Recognition Workshops (CVPRW) (2015)","key":"10_CR34","DOI":"10.1109\/CVPRW.2015.7301274"},{"unstructured":"Yosinski, J., Clune, J., Bengio, Y., Lipson, H.: How transferable are features in deep neural networks? In: NIPS (2014)","key":"10_CR35"},{"doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Torralba, A., Oliva, A.: Places: an image database for deep scene understanding. arXiv preprint arXiv:1610.02055 (2016)","key":"10_CR36","DOI":"10.1167\/17.10.296"},{"unstructured":"Zhou, B., Lapedriza, A., Xiao, J., Torralba, A., Oliva, A.: Learning deep features for scene recognition using places database. In: Proceedings of the 28th Annual Conference on Neural Information Processing Systems 2014, NIPS 2014, vol. 1, pp. 487\u2013495 (2014)","key":"10_CR37"}],"container-title":["Communications in Computer and Information Science","Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-10-7305-2_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T17:22:46Z","timestamp":1709832166000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-10-7305-2_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9789811073045","9789811073052"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-981-10-7305-2_10","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"8 December 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CCCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF Chinese Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 October 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 October 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cccv2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ccf-cccv.org\/2017\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}