{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T20:40:09Z","timestamp":1750884009906,"version":"3.41.0"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319681542"},{"type":"electronic","value":"9783319681559"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-68155-9_20","type":"book-chapter","created":{"date-parts":[[2017,9,19]],"date-time":"2017-09-19T02:25:08Z","timestamp":1505787908000},"page":"261-272","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Deep Semantic Indexing Using Convolutional Localization Network with Region-Based Visual Attention for Image Database"],"prefix":"10.1007","author":[{"given":"Mingxing","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Yang","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Hanwang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yanli","family":"Ji","sequence":"additional","affiliation":[]},{"given":"Ning","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Heng Tao","family":"Shen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,9,20]]},"reference":[{"key":"20_CR1","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv (2014)"},{"issue":"6","key":"20_CR2","first-page":"71","volume":"6","author":"A Bhuiyan","year":"2015","unstructured":"Bhuiyan, A.: Content-based image retrieval for image indexing. IJACSA 6(6), 71\u201379 (2015)","journal-title":"IJACSA"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Bin, Y., Yang, Y., Shen, F., Xu, X., Shen, H.T.: Bidirectional long-short term memory for video description. In: ACM MM, pp. 436\u2013440. ACM (2016)","DOI":"10.1145\/2964284.2967258"},{"key":"20_CR4","unstructured":"Bhuiyan, A., Chen, X., Zitnick, C.L.: Learning a recurrent visual representation for image caption generation. arXiv (2014)"},{"key":"20_CR5","unstructured":"Chiueh, T.-C.: Content-based image indexing. In: VLDB, vol. 94, pp. 582\u2013593 (1994)"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Donahue, J., Hendricks, L.A., Guadarrama, S., Rohrbach, M., Venugopalan, S., Saenko, K., Darrell, T.: Long-term recurrent convolutional networks for visual recognition and description. In: CVPR, pp. 2625\u20132634 (2015)","DOI":"10.21236\/ADA623249"},{"issue":"1","key":"20_CR7","first-page":"3","volume":"2","author":"MD Ewald","year":"2009","unstructured":"Ewald, M.D.: Content-based image indexing and retrieval in an image database for technical domains. Trans. MLDM 2(1), 3\u201322 (2009)","journal-title":"Trans. MLDM"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Fang, H., Gupta, S., Iandola, F., Srivastava, R.K., Deng, L., Doll\u00e1r, P., Gao, J., He, X., Mitchell, M., Platt, J.C., et al.: From captions to visual concepts and back. In: CVPR, pp. 1473\u20131482 (2015)","DOI":"10.1109\/CVPR.2015.7298754"},{"issue":"9","key":"20_CR9","doi-asserted-by":"publisher","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"PF Felzenszwalb","year":"2010","unstructured":"Felzenszwalb, P.F., Girshick, R.B., McAllester, D., Ramanan, D.: Object detection with discriminatively trained part-based models. TPAMI 32(9), 1627\u20131645 (2010)","journal-title":"TPAMI"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: ICCV, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: CVPR, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"issue":"8","key":"20_CR12","first-page":"1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. NC 9(8), 1735\u20131780 (1997)","journal-title":"NC"},{"issue":"4","key":"20_CR13","doi-asserted-by":"publisher","first-page":"814","DOI":"10.1109\/TPAMI.2015.2465908","volume":"38","author":"J Hosang","year":"2016","unstructured":"Hosang, J., Benenson, R., Dollr, P., Schiele, B.: What makes for effective detection proposals? TPAMI 38(4), 814\u2013830 (2016)","journal-title":"TPAMI"},{"key":"20_CR14","unstructured":"Hyv\u00f6nen, E., Saarela, S., Styrman, A., Viljanen, K.: Ontology-based image retrieval. In: WWW (2003)"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Li, F.-F.: Deep visual-semantic alignments for generating image descriptions. In: CVPR, pp. 3128\u20133137 (2015)","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"20_CR16","unstructured":"Kiros, R., Salakhutdinov, R., Zemel, R.S.: Multimodal neural language models. In: ICML, vol. 14, pp. 595\u2013603 (2014)"},{"key":"20_CR17","unstructured":"Kiros, R., Salakhutdinov, R., Zemel, R.S.: Unifying visual-semantic embeddings with multimodal neural language models. arXiv (2014)"},{"key":"20_CR18","series-title":"Synthese Library (Studies in Epistemology, Logic, Methodology, and Philosophy of Science)","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/978-94-009-3833-5_5","volume-title":"Matters of Intelligence","author":"C Koch","year":"1987","unstructured":"Koch, C., Ullman, S.: Shifts in selective visual attention: towards the underlying neural circuitry. In: Vaina, L.M. (ed.) MI. SYLI, vol. 188, pp. 115\u2013141. Springer, Dordrecht (1987). doi:10.1007\/978-94-009-3833-5_5"},{"key":"20_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). doi:10.1007\/978-3-319-10602-1_48"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Luong, M.-T., Pham, H., Manning, C.D.: Effective approaches to attention-based neural machine translation. arXiv (2015)","DOI":"10.18653\/v1\/D15-1166"},{"key":"20_CR21","unstructured":"Mao, J., Xu, W., Yang, Y., Wang, J., Huang, Z., Yuille, A.: Deep captioning with multimodal recurrent neural networks (m-rnn). arXiv (2014)"},{"key":"20_CR22","unstructured":"Michel, F.: How many photos are uploaded to flickr every day and month? (2012). http:\/\/www.flickr.com\/photos\/franckmichel\/6855169886\/"},{"key":"20_CR23","unstructured":"Neubig, G.: Neural machine translation and sequence-to-sequence models: a tutorial. arXiv (2017)"},{"issue":"1","key":"20_CR24","first-page":"77","volume":"25","author":"TV Nguyen","year":"2015","unstructured":"Nguyen, T.V., Song, Z., Yan, S.: Stap: spatial-temporal attention-aware pooling for action recognition. TCSVT 25(1), 77\u201386 (2015)","journal-title":"TCSVT"},{"key":"20_CR25","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., Wang, L., Cervantes, C.M., Caicedo, J.C., Hockenmaier, J., Lazebnik, S.: Flickr30k entities: collecting region-to-phrase correspondences for richer image-to-sentence models. In: ICCV, pp. 2641\u20132649 (2015)","DOI":"10.1109\/ICCV.2015.303"},{"key":"20_CR26","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: towards real-time object detection with region proposal networks. In: NIPS, pp. 91\u201399 (2015)"},{"key":"20_CR27","unstructured":"Sharma, S., Kiros, R., Salakhutdinov, R.: Action recognition using visual attention. arXiv (2015)"},{"key":"20_CR28","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: NIPS, pp. 3104\u20133112 (2014)"},{"issue":"2","key":"20_CR29","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JRR Uijlings","year":"2013","unstructured":"Uijlings, J.R.R., van de Sande, K.E.A., Gevers, T., Smeulders, A.W.M.: Selective search for object recognition. IJCV 104(2), 154\u2013171 (2013)","journal-title":"IJCV"},{"issue":"1","key":"20_CR30","first-page":"117","volume":"10","author":"A Vailaya","year":"2001","unstructured":"Vailaya, A., Figueiredo, M.A.T., Jain, A.K., Zhang, H.-J.: Image classification for content-based indexing. TIP 10(1), 117\u2013130 (2001)","journal-title":"TIP"},{"key":"20_CR31","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: CVPR, pp. 3156\u20133164 (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"20_CR32","doi-asserted-by":"crossref","unstructured":"Viola, P., Jones, M.: Rapid object detection using a boosted cascade of simple features. In: CVPR, vol. 1, p. I-511. IEEE (2001)","DOI":"10.1109\/CVPR.2001.990517"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"Wang, C., Yang, H., Bartz, C., Meinel, C.: Image captioning with deep bidirectional LSTMs. In: ACM MM, pp. 988\u2013997. ACM (2016)","DOI":"10.1145\/2964284.2964299"},{"issue":"1","key":"20_CR34","first-page":"97","volume":"26","author":"W Xindong","year":"2014","unstructured":"Xindong, W., Zhu, X., Gongqing, W., Ding, W.: Data mining with big data. TKDE 26(1), 97\u2013107 (2014)","journal-title":"TKDE"},{"key":"20_CR35","unstructured":"Xu, K., Ba, J., Kiros, R., Cho, K., Courville, A., Salakhutdinov, R., Zemel, R.S., Bengio, Y.: Show, attend and tell: neural image caption generation with visual attention. arXiv 2(3), 5 (2015)"},{"key":"20_CR36","doi-asserted-by":"crossref","unstructured":"Yanai, K.: Image collector: an image-gathering system from the world-wide web employing keyword-based search engines. In: ICME (2001)","DOI":"10.1109\/ICME.2001.1237772"},{"key":"20_CR37","doi-asserted-by":"crossref","unstructured":"Yang, Y., Luo, Y., Chen, W., Shen, F., Shao, J., Shen, H.T.: Zero-shot hashing via transferring supervised knowledge. In: ACM MM, pp. 1286\u20131295. ACM (2016)","DOI":"10.1145\/2964284.2964319"},{"issue":"6","key":"20_CR38","first-page":"1677","volume":"16","author":"Y Yang","year":"2014","unstructured":"Yang, Y., Zha, Z.-J., Gao, Y., Zhu, X., Chua, T.-S.: Exploiting web images for semantic video indexing via robust sample-specific loss. TMM 16(6), 1677\u20131689 (2014)","journal-title":"TMM"},{"key":"20_CR39","doi-asserted-by":"crossref","unstructured":"Yao, L., Torabi, A., Cho, K., Ballas, N., Pal, C., Larochelle, H., Courville, A.: Describing videos by exploiting temporal structure. In: ICCV, pp. 4507\u20134515 (2015)","DOI":"10.1109\/ICCV.2015.512"},{"key":"20_CR40","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1007\/978-3-319-10602-1_26","volume-title":"Computer Vision \u2013 ECCV 2014","author":"CL Zitnick","year":"2014","unstructured":"Zitnick, C.L., Doll\u00e1r, P.: Edge boxes: locating object proposals from edges. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 391\u2013405. Springer, Cham (2014). doi:10.1007\/978-3-319-10602-1_26"}],"container-title":["Lecture Notes in Computer Science","Databases Theory and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-68155-9_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T20:08:50Z","timestamp":1750882130000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-68155-9_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319681542","9783319681559"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-68155-9_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"20 September 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ADC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australasian Database Conference","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brisbane","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 September 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 September 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"adc2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/adc-conferences.org.au\/adc2017\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}