{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T09:54:15Z","timestamp":1756893255228,"version":"3.40.3"},"publisher-location":"Cham","reference-count":65,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030585761"},{"type":"electronic","value":"9783030585778"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58577-8_13","type":"book-chapter","created":{"date-parts":[[2020,9,23]],"date-time":"2020-09-23T14:04:27Z","timestamp":1600869867000},"page":"205-221","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Learning Surrogates via Deep Embedding"],"prefix":"10.1007","author":[{"given":"Yash","family":"Patel","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tom\u00e1\u0161","family":"Hoda\u0148","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ji\u0159\u00ed","family":"Matas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,9,24]]},"reference":[{"key":"13_CR1","unstructured":"Agustsson, E., et al.: Soft-to-hard vector quantization for end-to-end learning compressible representations. In: NeurIPS (2017)"},{"key":"13_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1007\/978-3-030-20893-6_10","volume-title":"Computer Vision \u2013 ACCV 2018","author":"SM Azimi","year":"2019","unstructured":"Azimi, S.M., Vig, E., Bahmanyar, R., K\u00f6rner, M., Reinartz, P.: Towards multi-class object detection in unconstrained remote sensing imagery. In: Jawahar, C.V., Li, H., Mori, G., Schindler, K. (eds.) ACCV 2018. LNCS, vol. 11363, pp. 150\u2013165. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-20893-6_10"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Baek, J., et al.: What is wrong with scene text recognition model comparisons? dataset and model analysis. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00481"},{"key":"13_CR4","unstructured":"Ball\u00e9, J., Minnen, D., Singh, S., Hwang, S.J., Johnston, N.: Variational image compression with a scale hyperprior. In: ICLR (2018)"},{"key":"13_CR5","unstructured":"Berrada, L., Zisserman, A., Kumar, M.P.: Smooth loss functions for deep top-k classification. In: ICLR (2018)"},{"key":"13_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/978-3-030-21074-8_11","volume-title":"Computer Vision \u2013 ACCV 2018 Workshops","author":"M Bu\u0161ta","year":"2019","unstructured":"Bu\u0161ta, M., Patel, Y., Matas, J.: E2E-MLT \u2013 an unconstrained end-to-end method for multi-language scene text. In: Carneiro, G., You, S. (eds.) ACCV 2018. LNCS, vol. 11367, pp. 127\u2013143. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-21074-8_11"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"Caron, M., Bojanowski, P., Joulin, A., Douze, M.: Deep clustering for unsupervised learning of visual features. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"2","key":"13_CR9","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1109\/TPAMI.2015.2439281","volume":"38","author":"C Dong","year":"2015","unstructured":"Dong, C., Loy, C.C., He, K., Tang, X.: Image super-resolution using deep convolutional networks. IEEE Trans. Pattern Anal. Mach. Intell. 38(2), 295\u2013307 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"Elsken, T., Metzen, J.H., Hutter, F.: Neural architecture search: a survey. arXiv preprint arXiv:1808.05377 (2018)","DOI":"10.1007\/978-3-030-05318-5_3"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Engilberge, M., Chevallier, L., P\u00e9rez, P., Cord, M.: Sodeep: a sorting deep net to learn ranking loss surrogates. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01105"},{"key":"13_CR12","unstructured":"Gidaris, S., Singh, P., Komodakis, N.: Unsupervised representation learning by predicting image rotations. In: ICLR (2018)"},{"key":"13_CR13","unstructured":"Glorot, X., Bordes, A., Bengio, Y.: Deep sparse rectifier neural networks. In: AISTATS (2011)"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Gomez, L., Patel, Y., Rusi\u00f1ol, M., Karatzas, D., Jawahar, C.: Self-supervised learning of visual features through embedding images into text topic spaces. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.218"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Gomez, R., et al.: ICDAR2017 robust reading challenge on coco-text. In: ICDAR (2017)","DOI":"10.1109\/ICDAR.2017.234"},{"key":"13_CR16","unstructured":"Grabocka, J., Scholz, R., Schmidt-Thieme, L.: Learning surrogate losses. arXiv preprint arXiv:1905.10108 (2019)"},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: ICML (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"13_CR18","unstructured":"Gulrajani, I., Ahmed, F., Arjovsky, M., Dumoulin, V., Courville, A.C.: Improved training of Wasserstein GANs. In: NeurIPS (2017)"},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"Gupta, A., Vedaldi, A., Zisserman, A.: Synthetic data for text localisation in natural images. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.254"},{"key":"13_CR20","unstructured":"Hazan, T., Keshet, J., McAllester, D.A.: Direct loss minimization for structured prediction. In: NeurIPS (2010)"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"13_CR22","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Hodan, T., et al.: Bop: benchmark for 6D object pose estimation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01249-6_2"},{"key":"13_CR24","unstructured":"Jaderberg, M., Simonyan, K., Vedaldi, A., Zisserman, A.: Synthetic data and artificial neural networks for natural scene text recognition. CoRR (2014)"},{"key":"13_CR25","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., Kavukcuoglu, K.: Spatial transformer networks. In: NeurIPS (2015)"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Karatzas, D., et al.: ICDAR 2015 competition on robust reading. In: ICDAR (2015)","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"13_CR27","doi-asserted-by":"crossref","unstructured":"Karatzas, D., et al.: ICDAR 2013 robust reading competition. In: ICDAR (2013)","DOI":"10.1109\/ICDAR.2013.221"},{"key":"13_CR28","doi-asserted-by":"crossref","unstructured":"Kato, H., Ushiku, Y., Harada, T.: Neural 3D mesh renderer. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00411"},{"key":"13_CR29","unstructured":"Kristan, M., et al.: The seventh visual object tracking vot2019 challenge results. In: ICCV Workshops (2019)"},{"key":"13_CR30","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: NeurIPS (2012)"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Lapin, M., Hein, M., Schiele, B.: Loss functions for top-k error: analysis and insights. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.163"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Ledig, C., et al.: Photo-realistic single image super-resolution using a generative adversarial network. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.19"},{"key":"13_CR33","unstructured":"Lee, J., Cho, S., Beack, S.K.: Context-adaptive entropy model for end-to-end optimized image compression. In: ICLR (2019)"},{"key":"13_CR34","unstructured":"Li, K., Malik, J.: Learning to optimize neural nets. arXiv preprint arXiv:1703.00441 (2017)"},{"key":"13_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"13_CR36","doi-asserted-by":"crossref","unstructured":"Liu, W., Chen, C., Wong, K.K., Su, Z., Han, J.: Star-net: a spatial attention residue network for scene text recognition. In: BMVC (2016)","DOI":"10.5244\/C.30.43"},{"key":"13_CR37","doi-asserted-by":"crossref","unstructured":"Liu, X., Liang, D., Yan, S., Chen, D., Qiao, Y., Yan, J.: FOTS: fast oriented text spotting with a unified network. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00595"},{"key":"13_CR38","unstructured":"Lucas, S.M., Panaretos, A., Sosa, L., Tang, A., Wong, S., Young, R.: ICDAR 2003 robust reading competitions. In: ICDAR (2003)"},{"key":"13_CR39","unstructured":"Ma, J.: RRPN in pytorch. https:\/\/github.com\/mjq11302010044\/RRPNpytorch (2019)"},{"issue":"11","key":"13_CR40","doi-asserted-by":"publisher","first-page":"3111","DOI":"10.1109\/TMM.2018.2818020","volume":"20","author":"J Ma","year":"2018","unstructured":"Ma, J., et al.: Arbitrary-oriented scene text detection via rotation proposals. IEEE Trans. Multimedia 20(11), 3111\u20133122 (2018)","journal-title":"IEEE Trans. Multimedia"},{"key":"13_CR41","doi-asserted-by":"crossref","unstructured":"Mishra, A., Alahari, K., Jawahar, C.: Scene text recognition using higher order language priors. In: BMVC (2012)","DOI":"10.5244\/C.26.127"},{"key":"13_CR42","unstructured":"Nagendar, G., Singh, D., Balasubramanian, V.N., Jawahar, C.: Neuro-IoU: learning a surrogate loss for semantic segmentation. In: BMVC (2018)"},{"key":"13_CR43","doi-asserted-by":"crossref","unstructured":"Nayef, N., et al.: ICDAR 2019 robust reading challenge on multi-lingual scene text detection and recognition\u2013RRC-MLT-2019. arXiv preprint arXiv:1907.00945 (2019)","DOI":"10.1109\/ICDAR.2019.00254"},{"key":"13_CR44","unstructured":"Patel, Y., Appalaraju, S., Manmatha, R.: Deep perceptual compression. arXiv preprint arXiv:1907.08310 (2019)"},{"key":"13_CR45","unstructured":"Patel, Y., Appalaraju, S., Manmatha, R.: Hierarchical auto-regressive model for image compression incorporating object saliency and a deep perceptual loss. arXiv preprint arXiv:2002.04988 (2020)"},{"key":"13_CR46","doi-asserted-by":"crossref","unstructured":"Prabhavalkar, R., et al.: Minimum word error rate training for attention-based sequence-to-sequence models. In: ICASSP (2018)","DOI":"10.1109\/ICASSP.2018.8461809"},{"key":"13_CR47","doi-asserted-by":"crossref","unstructured":"Quy Phan, T., Shivakumara, P., Tian, S., Lim Tan, C.: Recognizing text with perspective distortion in natural scenes. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.76"},{"key":"13_CR48","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-50835-1_22","volume-title":"Advances in Visual Computing","author":"MA Rahman","year":"2016","unstructured":"Rahman, M.A., Wang, Y.: Optimizing intersection-over-union in deep neural networks for image segmentation. In: Bebis, G. (ed.) ISVC 2016. LNCS, vol. 10072, pp. 234\u2013244. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-50835-1_22"},{"key":"13_CR49","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"13_CR50","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: Towards real-time object detection with region proposal networks. In: NeurIPS (2015)"},{"key":"13_CR51","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., Savarese, S.: Generalized intersection over union: a metric and a loss for bounding box regression. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00075"},{"issue":"18","key":"13_CR52","doi-asserted-by":"publisher","first-page":"8027","DOI":"10.1016\/j.eswa.2014.07.008","volume":"41","author":"A Risnumawan","year":"2014","unstructured":"Risnumawan, A., Shivakumara, P., Chan, C.S., Tan, C.L.: A robust arbitrary text detection system for natural scene images. Expert Syst. Appl. 41(18), 8027\u20138048 (2014)","journal-title":"Expert Syst. Appl."},{"key":"13_CR53","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning representations by back-propagating errors. Nature 323, 533\u2013536 (1986)","journal-title":"Nature"},{"key":"13_CR54","unstructured":"Ryoo, M.S., Piergiovanni, A., Tan, M., Angelova, A.: Assemblenet: searching for multi-stream neural connectivity in video architectures. In: NeurIPS (2019)"},{"key":"13_CR55","doi-asserted-by":"crossref","unstructured":"Shi, B., Wang, X., Lyu, P., Yao, C., Bai, X.: Robust scene text recognition with automatic rectification. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.452"},{"key":"13_CR56","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"13_CR57","unstructured":"Song, Y., Schwing, A., Urtasun, R., et al.: Training deep neural networks via direct loss minimization. In: ICML (2016)"},{"key":"13_CR58","unstructured":"Wang, K., Babenko, B., Belongie, S.: End-to-end scene text recognition. In: ICCV (2011)"},{"key":"13_CR59","unstructured":"Wang, Z., Simoncelli, E.P., Bovik, A.C.: Multiscale structural similarity for image quality assessment. In: ACSSC (2003)"},{"key":"13_CR60","doi-asserted-by":"crossref","unstructured":"Xia, G.S., et al.: DOTA: a large-scale dataset for object detection in aerial images. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00418"},{"key":"13_CR61","unstructured":"Xu, B., Wang, N., Chen, T., Li, M.: Empirical evaluation of rectified activations in convolutional network. CoRR (2015)"},{"key":"13_CR62","doi-asserted-by":"crossref","unstructured":"Yu, J., Jiang, Y., Wang, Z., Cao, Z., Huang, T.: Unitbox: an advanced object detection network. In: ACM MM (2016)","DOI":"10.1145\/2964284.2967274"},{"key":"13_CR63","unstructured":"Zeiler, M.D.: ADADELTA: an adaptive learning rate method. CoRR (2012)"},{"key":"13_CR64","unstructured":"Zhang, X., Zhao, J.J., LeCun, Y.: Character-level convolutional networks for text classification. In: NeurIPS (2015)"},{"key":"13_CR65","unstructured":"Zoph, B., Le, Q.V.: Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578 (2016)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58577-8_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,23]],"date-time":"2024-09-23T00:04:14Z","timestamp":1727049854000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58577-8_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585761","9783030585778"],"references-count":65,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58577-8_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"24 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}