{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T17:26:33Z","timestamp":1743096393159,"version":"3.40.3"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030585167"},{"type":"electronic","value":"9783030585174"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58517-4_38","type":"book-chapter","created":{"date-parts":[[2020,10,9]],"date-time":"2020-10-09T19:03:11Z","timestamp":1602270191000},"page":"649-665","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Location Sensitive Image Retrieval and Tagging"],"prefix":"10.1007","author":[{"given":"Raul","family":"Gomez","sequence":"first","affiliation":[]},{"given":"Jaume","family":"Gibert","sequence":"additional","affiliation":[]},{"given":"Lluis","family":"Gomez","sequence":"additional","affiliation":[]},{"given":"Dimosthenis","family":"Karatzas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,10]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Chen, D.M., et al.: City-scale landmark identification on mobile devices. In: CVPR (2011)","key":"38_CR1","DOI":"10.1109\/CVPR.2011.5995610"},{"doi-asserted-by":"crossref","unstructured":"Chu, G., et al.: Geo-aware networks for fine-grained recognition. In: ICCVW (2019)","key":"38_CR2","DOI":"10.1109\/ICCVW.2019.00033"},{"unstructured":"Frome, A., Corrado, G.S., Shlens, J., Bengio Jeffrey Dean, S., Ranzato, A., Mikolov, T.: DeViSE: a deep visual-semantic embedding model. In: NIPS (2013)","key":"38_CR3"},{"doi-asserted-by":"crossref","unstructured":"Gallagher, A., Joshi, D., Yu, J., Luo, J.: Geo-location inference from image content and user tags. In: CVPR (2009)","key":"38_CR4","DOI":"10.1109\/CVPR.2009.5204168"},{"key":"38_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"485","DOI":"10.1007\/978-3-030-01246-5_29","volume-title":"Computer Vision \u2013 ECCV 2018","author":"P Gao","year":"2018","unstructured":"Gao, P., et al.: Question-guided hybrid convolution for visual question answering. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 485\u2013501. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_29"},{"doi-asserted-by":"crossref","unstructured":"Gomez, L., Patel, Y., Rusi\u00f1ol, M., Karatzas, D., Jawahar, C.V.: Self-supervised learning of visual features through embedding images into text topic spaces. In: CVPR (2017)","key":"38_CR6","DOI":"10.1109\/CVPR.2017.218"},{"doi-asserted-by":"crossref","unstructured":"Gordo, A., Larlus, D.: Beyond instance-level image retrieval: leveraging captions to learn a global visual representation for semantic retrieval. In: CVPR (2017)","key":"38_CR7","DOI":"10.1109\/CVPR.2017.560"},{"doi-asserted-by":"crossref","unstructured":"Hays, J., Efros, A.A.: IM2GPS: estimating geographic information from a single image. In: CVPR (2008)","key":"38_CR8","DOI":"10.1109\/CVPR.2008.4587784"},{"doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","key":"38_CR9","DOI":"10.1109\/CVPR.2016.90"},{"issue":"2","key":"38_CR10","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1109\/TMM.2016.2614861","volume":"19","author":"L Herranz","year":"2017","unstructured":"Herranz, L., Jiang, S., Xu, R.: Modeling restaurant context for food recognition. IEEE Trans. Multimedia 19(2), 430\u2013440 (2017)","journal-title":"IEEE Trans. Multimedia"},{"unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. arXiv (2015)","key":"38_CR11"},{"doi-asserted-by":"crossref","unstructured":"Jabri, A., Joulin, A., Van Der Maaten, L.: Revisiting visual question answering baselines. Technical report, Facebook AI Research (2016)","key":"38_CR12","DOI":"10.1007\/978-3-319-46484-8_44"},{"doi-asserted-by":"crossref","unstructured":"Kennedy, L., Naaman, M.: Generating diverse and representative image search results for landmarks. In: International Conference on World Wide Web (2008)","key":"38_CR13","DOI":"10.1145\/1367497.1367539"},{"doi-asserted-by":"crossref","unstructured":"Kennedy, L., Naaman, M., Ahern, S., Nair, R., Rattenbury, T.: How flickr helps us make sense of the world: context and content in community-contributed media collections. In: ACM International Conference on Multimedia (2007)","key":"38_CR14","DOI":"10.1145\/1291233.1291384"},{"unstructured":"Kiros, R., Salakhutdinov, R., Zemel, R.S.: Unifying visual-semantic embeddings with multimodal neural language models. arXiv (2014)","key":"38_CR15"},{"doi-asserted-by":"crossref","unstructured":"Kumar, A., Tardif, J.P., Anati, R., Daniilidis, K.: Experiments on visual loop closing using vocabulary trees. In: CVPR Workshops (2008)","key":"38_CR16","DOI":"10.1109\/CVPRW.2008.4563140"},{"issue":"3","key":"38_CR17","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1109\/TMM.2014.2302732","volume":"16","author":"J Liu","year":"2014","unstructured":"Liu, J., Li, Z., Tang, J., Jiang, Y., Lu, H.: Personalized geo-specific tag recommendation for photos on social websites. IEEE Trans. Multimedia 16(3), 588\u2013600 (2014)","journal-title":"IEEE Trans. Multimedia"},{"doi-asserted-by":"crossref","unstructured":"Mac Aodha, O., Cole, E., Perona, P.: Presence-only geographical priors for fine-grained image classification. In: ICCV (2019)","key":"38_CR18","DOI":"10.1109\/ICCV.2019.00969"},{"key":"38_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1007\/978-3-030-01216-8_12","volume-title":"Computer Vision \u2013 ECCV 2018","author":"D Mahajan","year":"2018","unstructured":"Mahajan, D., et al.: Exploring the limits of weakly supervised pretraining. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11206, pp. 185\u2013201. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01216-8_12"},{"key":"38_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"656","DOI":"10.1007\/978-3-030-01252-6_39","volume-title":"Computer Vision \u2013 ECCV 2018","author":"E Margffoy-Tuay","year":"2018","unstructured":"Margffoy-Tuay, E., P\u00e9rez, J.C., Botero, E., Arbel\u00e1ez, P.: Dynamic multimodal instance segmentation guided by natural language queries. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11215, pp. 656\u2013672. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01252-6_39"},{"unstructured":"Mikolov, T., Corrado, G., Chen, K., Dean, J.: Efficient estimation of word representations in vector space. In: ICLR (2013)","key":"38_CR21"},{"doi-asserted-by":"crossref","unstructured":"Moxley, E., Kleban, J., Manjunath, B.: SpiritTagger: a geo-aware tag suggestion tool mined from flickr. In: Proceedings of ACM ICMIR (2008)","key":"38_CR22","DOI":"10.1145\/1460096.1460102"},{"key":"38_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"575","DOI":"10.1007\/978-3-030-01258-8_35","volume-title":"Computer Vision \u2013 ECCV 2018","author":"E M\u00fcller-Budack","year":"2018","unstructured":"M\u00fcller-Budack, E., Pustu-Iren, K., Ewerth, R.: Geolocation estimation of photos using a hierarchical model and scene classification. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11216, pp. 575\u2013592. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01258-8_35"},{"doi-asserted-by":"crossref","unstructured":"O\u2019Hare, N., Gurrin, C., Jones, G.J., Smeaton, A.F.: Combination of content analysis and context features for digital photograph retrieval. In: European Workshop on the Integration of Knowledge, Semantics and Digital Media Technology (2005)","key":"38_CR24","DOI":"10.1049\/ic.2005.0750"},{"doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: global vectors for word representation. In: EMNLP (2014)","key":"38_CR25","DOI":"10.3115\/v1\/D14-1162"},{"doi-asserted-by":"crossref","unstructured":"Rajiv Jain, C.W.: Multimodal document image classification. In: ICDAR (2019)","key":"38_CR26","DOI":"10.1109\/ICDAR.2019.00021"},{"doi-asserted-by":"crossref","unstructured":"Rattenbury, T., Good, N., Naaman, M.: Towards automatic extraction of event and place semantics from flickr tags. In: ACM SIGIR Conference on Research and Development in Information Retrieval (2007)","key":"38_CR27","DOI":"10.1145\/1277741.1277762"},{"doi-asserted-by":"crossref","unstructured":"Ren, Z., Jin, H., Lin, Z., Fang, C., Yuille, A.: Joint image-text representation by Gaussian visual-semantic embedding. In: ACM Multimedia (2016)","key":"38_CR28","DOI":"10.1145\/2964284.2967212"},{"doi-asserted-by":"crossref","unstructured":"Rohrbach, A., Rohrbach, M., Hu, R., Darrell, T., Schiele, B.: Grounding of textual phrases in images by reconstruction. Technical report, Max Planck Institute for Informatics (2015)","key":"38_CR29","DOI":"10.1007\/978-3-319-46448-0_49"},{"doi-asserted-by":"crossref","unstructured":"Salvador, A., et al.: Learning cross-modal embeddings for cooking recipes and food images. In: CVPR (2017)","key":"38_CR30","DOI":"10.1109\/CVPR.2017.327"},{"issue":"1","key":"38_CR31","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15(1), 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"doi-asserted-by":"crossref","unstructured":"Tang, K., Paluri, M., Fei-Fei, L., Fergus, R., Bourdev, L.: Improving image classification with location context. In: ICCV (2015)","key":"38_CR32","DOI":"10.1109\/ICCV.2015.121"},{"issue":"2","key":"38_CR33","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1145\/2812802","volume":"59","author":"B Thomee","year":"2015","unstructured":"Thomee, B., et al.: YFCC100M: the new data in multimedia research. Commun. ACM 59(2), 64\u201373 (2015)","journal-title":"Commun. ACM"},{"doi-asserted-by":"crossref","unstructured":"Veit, A., Nickel, M., Belongie, S., Maaten, L.V.D.: Separating self-expression and visual content in hashtag supervision. In: CVPR (2018)","key":"38_CR34","DOI":"10.1109\/CVPR.2018.00620"},{"doi-asserted-by":"crossref","unstructured":"Vo, N., Jacobs, N., Hays, J.: Revisiting IM2GPS in the deep learning era. In: ICCV (2017)","key":"38_CR35","DOI":"10.1109\/ICCV.2017.286"},{"doi-asserted-by":"crossref","unstructured":"Vo, N., et al.: Composing text and image for image retrieval - an empirical odyssey. In: CVPR (2019)","key":"38_CR36","DOI":"10.1109\/CVPR.2019.00660"},{"unstructured":"Wang, L., Li, Y., Huang, J., Lazebnik, S.: Learning two-branch neural networks for image-text matching tasks. In: CVPR (2017)","key":"38_CR37"},{"unstructured":"Wang, T., Wu, D.J., Coates, A., Ng, A.Y.: End-to-end text recognition with convolutional neural networks. In: ICPR (2012)","key":"38_CR38"},{"key":"38_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/978-3-319-46484-8_3","volume-title":"Computer Vision \u2013 ECCV 2016","author":"T Weyand","year":"2016","unstructured":"Weyand, T., Kostrikov, I., Philbin, J.: PlaNet - photo geolocation with convolutional neural networks. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 37\u201355. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_3"},{"doi-asserted-by":"crossref","unstructured":"Wu, B., Chen, W., Sun, P., Liu, W., Ghanem, B., Lyu, S.: Tagging like humans: diverse and distinct image annotation. In: CVPR (2018)","key":"38_CR40","DOI":"10.1109\/CVPR.2017.656"},{"doi-asserted-by":"crossref","unstructured":"Wu, B., Jia, F., Liu, W., Ghanem, B.: Diverse image annotation. In: CVPR (2017)","key":"38_CR41","DOI":"10.1109\/CVPR.2017.656"},{"issue":"3","key":"38_CR42","doi-asserted-by":"crossref","first-page":"716","DOI":"10.1109\/TPAMI.2012.124","volume":"35","author":"L Wu","year":"2012","unstructured":"Wu, L., Jin, R., Jain, A.K.: Tag completion for image retrieval. IEEE Trans. Pattern Anal. Mach. Intell. 35(3), 716\u2013727 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"38_CR43","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1007\/s11263-019-01198-w","volume":"128","author":"Y Wu","year":"2019","unstructured":"Wu, Y., He, K.: Group normalization. Int. J. Comput. Vis. 128(3), 742\u2013755 (2019). https:\/\/doi.org\/10.1007\/s11263-019-01198-w","journal-title":"Int. J. Comput. Vis."},{"issue":"8","key":"38_CR44","doi-asserted-by":"publisher","first-page":"1187","DOI":"10.1109\/TMM.2015.2438717","volume":"17","author":"R Xu","year":"2015","unstructured":"Xu, R., Herranz, L., Jiang, S., Wang, S., Song, X., Jain, R.: Geolocalized modeling for dish recognition. IEEE Trans. Multimedia 17(8), 1187\u20131199 (2015)","journal-title":"IEEE Trans. Multimedia"},{"doi-asserted-by":"crossref","unstructured":"Yang, F., et al.: Exploring deep multimodal fusion of text and photo for hate speech classification. In: Workshop on Abusive Language Online (2019)","key":"38_CR45","DOI":"10.18653\/v1\/W19-3502"},{"doi-asserted-by":"crossref","unstructured":"Yuan, J., Luo, J., Kautz, H., Wu, Y.: Mining GPS traces and visual words for event classification. Technical report, Northwestern University (2008)","key":"38_CR46","DOI":"10.1145\/1460096.1460099"},{"issue":"3","key":"38_CR47","first-page":"1","volume":"8","author":"J Zhang","year":"2017","unstructured":"Zhang, J., Wang, S., Huang, Q.: Location-based parallel tag completion for geo-tagged social image retrieval general terms. ACM Trans. Intell. Syst. Technol. 8(3), 1\u201321 (2017)","journal-title":"ACM Trans. Intell. Syst. Technol."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58517-4_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,9]],"date-time":"2024-10-09T00:17:08Z","timestamp":1728433028000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58517-4_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585167","9783030585174"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58517-4_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"10 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}