{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T17:58:44Z","timestamp":1763747924698,"version":"3.40.3"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030110239"},{"type":"electronic","value":"9783030110246"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-11024-6_40","type":"book-chapter","created":{"date-parts":[[2019,1,24]],"date-time":"2019-01-24T04:29:27Z","timestamp":1548304167000},"page":"514-529","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Learning to Learn from Web Data Through Deep Semantic Embeddings"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4460-3500","authenticated-orcid":false,"given":"Raul","family":"Gomez","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1408-9803","authenticated-orcid":false,"given":"Lluis","family":"Gomez","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9723-3913","authenticated-orcid":false,"given":"Jaume","family":"Gibert","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8762-4454","authenticated-orcid":false,"given":"Dimosthenis","family":"Karatzas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,1,23]]},"reference":[{"key":"40_CR1","doi-asserted-by":"crossref","unstructured":"Blei, D.M., Ng, A.Y., Jordan, M.I.: Latent dirichlet allocation. J. Mach. Learn. Res. (2003)","DOI":"10.7551\/mitpress\/1120.003.0082"},{"key":"40_CR2","doi-asserted-by":"crossref","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching Word Vectors with Subword Information (2016)","DOI":"10.1162\/tacl_a_00051"},{"key":"40_CR3","doi-asserted-by":"crossref","unstructured":"Ding, G., Guo, Y., Zhou, J.: Collective matrix factorization hashing for multimodal data. In: Proceedings IEEE Computer Society Conference Computer Vision and Pattern Recognition (2014)","DOI":"10.1109\/CVPR.2014.267"},{"key":"40_CR4","doi-asserted-by":"crossref","unstructured":"Fu, J., Wu, Y., Mei, T., Wang, J., Lu, H., Rui, Y.: Relaxing from vocabulary: robust weakly-supervised deep learning for vocabulary-free image tagging. In: Proceedings IEEE International Conference Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/ICCV.2015.230"},{"key":"40_CR5","doi-asserted-by":"crossref","unstructured":"Gomez, L., Patel, Y., Rusi\u00f1ol, M., Karatzas, D., Jawahar, C.V.: Self-supervised learning of visual features through embedding images into text topic spaces. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.218"},{"key":"40_CR6","doi-asserted-by":"crossref","unstructured":"Gordo, A., Almazan, J., Murray, N., Perronin, F.: LEWIS: latent embeddings for word images and their semantics. In: Proceedings IEEE International Conference Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/ICCV.2015.147"},{"key":"40_CR7","doi-asserted-by":"crossref","unstructured":"Gordo, A., Larlus, D.: Beyond instance-level image retrieval: leveraging captions to learn a global visual representation for semantic retrieval. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.560"},{"key":"40_CR8","doi-asserted-by":"crossref","unstructured":"Gupta, A., Vedaldi, A., Zisserman, A.: Synthetic data for text localisation in natural images. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.254"},{"key":"40_CR9","doi-asserted-by":"crossref","unstructured":"Huiskes, M.J., Lew, M.S.: The MIR flickr retrieval evaluation. In: Proceeding 1st ACM International Conference Multimedia Information Retrieval - MIR 2008 (2008)","DOI":"10.1145\/1460096.1460104"},{"key":"40_CR10","doi-asserted-by":"crossref","unstructured":"Jia, Y., et al.: Caffe: Convolutional Architecture for Fast Feature Embedding. arXiv (2014)","DOI":"10.1145\/2647868.2654889"},{"key":"40_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Li, F.-F.: ImageNet: a large-scale hierarchical image database. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"40_CR12","unstructured":"Le, Q.V., Mikolov, T.: Distributed representations of sentences and documents. In: NIPS (2014)"},{"key":"40_CR13","doi-asserted-by":"crossref","unstructured":"Li, K., Qi, G.J., Ye, J., Hua, K.A.: Linear subspace ranking hashing for cross-modal retrieval. IEEE Trans. Pattern Anal. Mach. Intell. (2017)","DOI":"10.1109\/TPAMI.2016.2610969"},{"key":"40_CR14","unstructured":"Li, W., et al.: WebVision Challenge: Visual Learning and Understanding With Web Data (2017)"},{"key":"40_CR15","unstructured":"Li, W., Wang, L., Li, W., Agustsson, E., Van Gool, L.: WebVision Database: Visual Learning and Understanding from Web Data (2017)"},{"key":"40_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"40_CR17","doi-asserted-by":"crossref","unstructured":"Lin, Z., Ding, G., Hu, M., Wang, J.: Semantics-preserving hashing for cross-view retrieval. In: Proceedings IEEE Computer Society Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/CVPR.2015.7299011"},{"key":"40_CR18","doi-asserted-by":"crossref","unstructured":"Liu, L., Lin, Z., Shao, L., Shen, F., Ding, G., Han, J.: Sequential discrete hashing for scalable cross-modality similarity retrieval. IEEE Trans. Image Process. (2017)","DOI":"10.1109\/TIP.2016.2619262"},{"key":"40_CR19","unstructured":"Mar, J., David, V., Ger, D., Antonio, M.L.: Learning appearance in virtual scenarios for pedestrian detection. In: CVPR (2010)"},{"key":"40_CR20","doi-asserted-by":"crossref","unstructured":"Melucci, M.: Relevance feedback algorithms inspired by quantum detection. IEEE Trans. Knowl. Data Eng. (2016)","DOI":"10.1109\/TKDE.2015.2507132"},{"key":"40_CR21","unstructured":"Mikolov, T., Corrado, G., Chen, K., Dean, J.: Efficient estimation of word representations in vector space. In: ICLR (2013)"},{"key":"40_CR22","unstructured":"Norouzi, M., et al.: Zero-shot learning by convex combination of semantic embeddings. In: NIPS (2013)"},{"key":"40_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/978-3-319-46604-0_29","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"Y Patel","year":"2016","unstructured":"Patel, Y., Gomez, L., Rusi\u00f1ol, M., Karatzas, D.: Dynamic lexicon generation for natural scene images. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9913, pp. 395\u2013410. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46604-0_29"},{"key":"40_CR24","doi-asserted-by":"crossref","unstructured":"Patrini, G., Rozza, A., Menon, A., Nock, R., Qu, L.: Making deep neural networks robust to label noise: a loss correction approach. In: CVPR (2016)","DOI":"10.1109\/CVPR.2017.240"},{"key":"40_CR25","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: GloVe: global vectors for word representation. In: EMNLP (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"40_CR26","doi-asserted-by":"crossref","unstructured":"Phan, T.Q., Shivakumara, P., Tian, S., Tan, C.L.: Recognizing text with perspective distortion in natural scenes. In: Proceedings IEEE Computer Society Conference on Computer Vision and Pattern Recognition (2013)","DOI":"10.1109\/ICCV.2013.76"},{"key":"40_CR27","unstructured":"Princeton University: WordNet (2010). http:\/\/wordnet.princeton.edu\/"},{"key":"40_CR28","doi-asserted-by":"crossref","unstructured":"Ros, G., Sellart, L., Materzynska, J., Vazquez, D., Lopez, A.M.: The SYNTHIA dataset: a large collection of synthetic images for semantic segmentation of urban scenes. In: 2016 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.352"},{"key":"40_CR29","doi-asserted-by":"crossref","unstructured":"Salvador, A., et al.: Learning cross-modal embeddings for cooking recipes and food images. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.327"},{"key":"40_CR30","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: Proceedings IEEE Computer Society Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"40_CR31","doi-asserted-by":"crossref","unstructured":"Wang, J., Li, G.: A multi-modal hashing learning framework for automatic image annotation. In: 2017 IEEE Second International Conference on Data Science in Cyberspace (2017)","DOI":"10.1109\/DSC.2017.48"},{"key":"40_CR32","doi-asserted-by":"crossref","unstructured":"Wang, L., Li, Y., Lazebnik, S.: Learning deep structure-preserving image-text embeddings. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.541"},{"key":"40_CR33","unstructured":"Xiao, T., Xia, T., Yang, Y., Huang, C., Wang, X.: Learning from massive noisy labeled data for image classification. In: Proceedings IEEE Computer Society Conference on Computer Vision and Pattern Recognition (2015)"},{"key":"40_CR34","doi-asserted-by":"crossref","unstructured":"Xu, X., He, L., Lu, H., Shimada, A., Taniguchi, R.I.: Non-linear matrix completion for social image tagging. IEEE Access (2017)","DOI":"10.1109\/ACCESS.2016.2624267"},{"key":"40_CR35","doi-asserted-by":"crossref","unstructured":"Xu, X., Shen, F., Yang, Y., Shen, H.T., Li, X.: Learning discriminative binary codes for large-scale cross-modal retrieval. IEEE Trans. Image Process. (2017)","DOI":"10.1109\/TIP.2017.2676345"},{"key":"40_CR36","unstructured":"Yosinski, J., Clune, J., Bengio, Y., Lipson, H.: How transferable are features in deep neural networks? In: NIPS (2014)"},{"key":"40_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, D., Li, W.J.: Large-scale supervised multimodal hashing with semantic correlation maximization. In: AAAI, pp. 2177\u20132183 (2014)","DOI":"10.1609\/aaai.v28i1.8995"},{"key":"40_CR38","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhang, X., Li, X., Li, Z., Wang, S.: Classify social image by integrating multi-modal content. Multimed. Tools Appl. (2018)","DOI":"10.1007\/s11042-017-4657-2"},{"key":"40_CR39","unstructured":"Zhen, Y., Yeung, D.Y.: Co-regularized hashing for multimodal data. In: Advances in Neural Information Processing Systems, pp. 1385\u20131393 (2012)"},{"key":"40_CR40","doi-asserted-by":"crossref","unstructured":"Zhou, B., Lapedriza, A., Khosla, A., Oliva, A., Torralba, A.: Places: a 10 million image database for scene recognition. IEEE Trans. Pattern Anal. Mach. Intell. (2017)","DOI":"10.1167\/17.10.296"},{"key":"40_CR41","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1007\/978-3-319-10578-9_34","volume-title":"Computer Vision \u2013 ECCV 2014","author":"B Zhou","year":"2014","unstructured":"Zhou, B., Liu, L., Oliva, A., Torralba, A.: Recognizing city identity via attribute analysis of geo-tagged images. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8691, pp. 519\u2013534. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10578-9_34"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-11024-6_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,14]],"date-time":"2024-07-14T08:39:58Z","timestamp":1720946398000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-11024-6_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030110239","9783030110246"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-11024-6_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"23 January 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}