{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T18:33:16Z","timestamp":1770834796449,"version":"3.50.1"},"publisher-location":"Cham","reference-count":89,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031198298","type":"print"},{"value":"9783031198304","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19830-4_24","type":"book-chapter","created":{"date-parts":[[2022,10,21]],"date-time":"2022-10-21T16:21:10Z","timestamp":1666369270000},"page":"413-430","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["EclipSE: Efficient Long-Range Video Retrieval Using Sight and\u00a0Sound"],"prefix":"10.1007","author":[{"given":"Yan-Bo","family":"Lin","sequence":"first","affiliation":[]},{"given":"Jie","family":"Lei","sequence":"additional","affiliation":[]},{"given":"Mohit","family":"Bansal","sequence":"additional","affiliation":[]},{"given":"Gedas","family":"Bertasius","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,22]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Croitoru, I., et al.: Crossmodal generalized distillation for text-video retrieval. In ICCV, Teachtext (2021)","DOI":"10.1109\/ICCV48922.2021.01138"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Liu, S., Fan, H., Qian, S., Chen, Y., Ding, W., Wang, Z.: Hierarchical transformer with momentum contrast for video-text retrieval. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01170"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Bain, M., Nagrani, A., Varol, G., Zisserman, A.: Frozen in time: a joint video and image encoder for end-to-end retrieval. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhu, L., Yang, Y.: T2VLAD: global-local sequence alignment for text-video retrieval. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00504"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Wray, M., Doughty, H., Damen, D.: On semantic similarity in video retrieval. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00365"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"Gabeur, V., Sun, C., Alahari, K., Schmid, C.: Multi-modal transformer for video retrieval. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58548-8_13"},{"key":"24_CR7","doi-asserted-by":"crossref","unstructured":"Gabeur, V., Nagrani, A., Sun, C., Alahari, K., Schmid, C.: Masking modalities for cross-modal video retrieval. In: WACV (2022)","DOI":"10.1109\/WACV51458.2022.00217"},{"key":"24_CR8","unstructured":"Hu, X., et al.: Contrastive pre-training for zero-shot video-text understanding. In: EMNLP, VideoCLIP (2021)"},{"key":"24_CR9","doi-asserted-by":"crossref","unstructured":"Lei, J., et al.: ClipBERT for video-and-language learning via sparse sampling. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00725"},{"key":"24_CR10","unstructured":"Dong, J., Li, X., Snoek, C.G.M.: Word2VisualVec: image and video to sentence matching by visual feature prediction. arXiv Preprint (2016)"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"Xu, R., Xiong, C., Chen, W., Corso, J.: Jointly modeling deep video and compositional text to bridge vision and language in a unified framework. In: AAAI (2015)","DOI":"10.1609\/aaai.v29i1.9512"},{"key":"24_CR12","unstructured":"Kiros, R., Salakhutdinov, R., Zemel, R.S.: Unifying visual-semantic embeddings with multimodal neural language models. arXiv Preprint (2014)"},{"key":"24_CR13","unstructured":"Fang, H., Xiong, P., Xu, L., Chen, Y.: Clip2Video: Mastering video-text retrieval via image clip. arXiv Preprint (2021)"},{"key":"24_CR14","unstructured":"Gao, Z., et al.: CLIP2TV: an empirical study on transformer-based methods for video-text retrieval. arXiv Preprint (2021)"},{"key":"24_CR15","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML (2021)"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Luo, H., et al.: CLIP4Clip: an empirical study of clip for end to end video clip retrieval. arXiv Preprint (2021)","DOI":"10.1016\/j.neucom.2022.07.028"},{"key":"24_CR17","unstructured":"Nagrani, A., Yang, S., Arnab, A., Jansen, A., Schmid, C., Sun, C.: Attention bottlenecks for multimodal fusion. In: NeurIPS (2021)"},{"key":"24_CR18","unstructured":"Kazakos, E., Huh, J., Nagrani, A., Zisserman, A., Damen, D.: Multimodal egocentric action recognition. In: BMVC (2021)"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Arandjelovi\u0107, R., Zisserman, A.: Objects that sound. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01246-5_27"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"Vasudevan, A.B., Dai, D., Van Gool, L.: Sound and visual representation learning with multiple pretraining tasks, In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01421"},{"key":"24_CR21","doi-asserted-by":"crossref","unstructured":"Afouras, T., Asano, Y.M., Fagan, F., Vedaldi, A., Metze, F.: Self-supervised object detection from audio-visual correspondence. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01032"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Aytar, Y., Vondrick, C., Torralba, A.: Learning sound representations from unlabeled video. In: NeurIPS, SoundNet (2016)","DOI":"10.1109\/CVPR.2016.18"},{"key":"24_CR23","unstructured":"lwassel, H., Mahajan, D., Torresani, L., Ghanem, B., Tran, D.: Self-supervised learning by cross-modal audio-video clustering. In: NeurIPS (2020)"},{"key":"24_CR24","unstructured":"Lin, Y.-B., Tseng, H.-Y., Lee, H.-Y., Lin, Y.-Y., Yang, M.-H.: Exploring cross-video and cross-modality signals for weakly-supervised audio-visual video parsing. In: NeurIPS (2021)"},{"key":"24_CR25","doi-asserted-by":"crossref","unstructured":"Gao, R., Tae-Hyun, O., Grauman, K., Torresani, L.: Action recognition by previewing audio. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01047"},{"key":"24_CR26","doi-asserted-by":"crossref","unstructured":"Krishna, R., Hata, K., Ren, F., Fei-Fei, L., Carlos Niebles, J.: Dense-captioning events in videos. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.83"},{"key":"24_CR27","unstructured":"Lei, J., Berg, T.L., Bansal, M.: QVHighlights: detecting moments and highlights in videos via natural language queries. In: NeurIPS (2021)"},{"key":"24_CR28","doi-asserted-by":"crossref","unstructured":"Hendricks, L.A., Wang, O., Shechtman, E., Sivic, J., Darrell, T., Russell, B.: Localizing moments in video with natural language. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.618"},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Zhou, L., Xu, C., Corso, J.J.: Towards automatic learning of procedures from web instructional videos. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.12342"},{"key":"24_CR30","doi-asserted-by":"crossref","unstructured":"Sigurdsson, G.A., Varol, G., Wang, X., Farhadi, A., Laptev, A., Gupta, A.: Hollywood in homes: crowdsourcing data collection for activity understanding. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46448-0_31"},{"key":"24_CR31","unstructured":"Patrick, M., et al.: Support-set bottlenecks for video-text representation learning. In: ICLR (2021)"},{"key":"24_CR32","doi-asserted-by":"crossref","unstructured":"Amrani, E., Ben-Ari, R., Rotman, D., Bronstein, A.: Noise estimation using density estimation for self-supervised multimodal learning. In: AAAI (2020)","DOI":"10.1609\/aaai.v35i8.16822"},{"key":"24_CR33","doi-asserted-by":"crossref","unstructured":"Miech, A., Alayrac, J.-P., Smaira, L., Laptev, I., Sivic, J., Zisserman, A.: End-to-end learning of visual representations from uncurated instructional videos. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00990"},{"key":"24_CR34","doi-asserted-by":"crossref","unstructured":"Wray, M., Larlus, D., Csurka, G., Damen, D.: Fine-grained action retrieval through multiple parts-of-speech embeddings. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00054"},{"key":"24_CR35","doi-asserted-by":"crossref","unstructured":"Ge, Y., et al.: Bridging video-text retrieval with maultiple choice questions. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01569"},{"key":"24_CR36","unstructured":"Haoyu, L., Fei, N., Huo, Y., Gao, Y., Zhiwu, L., Rong Wen, J.: Collaborative two-stream vision-language pre-training model for cross-modal retrieval. In: CVPR (2022)"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Wang, J., et al.: Object-aware video-language pre-training for retrieval. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00331"},{"key":"24_CR38","doi-asserted-by":"crossref","unstructured":"Zhu, L., Yang, Y.: ActBERT: learning global-local video-text representations. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00877"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Li, L., et al.: Hierarchical encoder for Video+Language omni-representation pre-training. In: EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.161"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"Yu, Y., Kim, J., Kim, G.: A joint sequence fusion model for video question answering and retrieval. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01234-2_29"},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Yu, Y., Ko, H., Choi, J., Kim, G.: End-to-end concept word detection for video captioning, retrieval, and question answering. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.347"},{"key":"24_CR42","unstructured":"Luo, H., et al.: UniVL: a unified video and language pre-training model for multimodal understanding and generation. arXiv Preprint (2020)"},{"key":"24_CR43","unstructured":"Liu, Y., Albanie, S., Nagrani, A., Zisserman, A.: Video retrieval using representations from collaborative experts. In: BMVC (2019)"},{"key":"24_CR44","doi-asserted-by":"crossref","unstructured":"Mithun, N.C., Li, J., Metze, F., Roy-Chowdhury, A.M.: Learning joint embedding with multimodal cues for cross-modal video-text retrieval. In: ICMR (2018)","DOI":"10.1145\/3206025.3206064"},{"key":"24_CR45","unstructured":"Miech, A., Laptev, I., Sivic, J.: Learning a text-video embedding from incomplete and heterogeneous data. arXiv Preprint (2018)"},{"key":"24_CR46","unstructured":"Cheng, X., Lin, H., Wu, X., Yang, F., Shen, D.: Improving video-text retrieval by multi-stream corpus alignment and dual SoftMax loss. arXiv Preprint (2021)"},{"key":"24_CR47","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wu, Y., Narasimhan, K., Russakovsky, O.: Multi-query video retrieval. arXiv Preprint (2022)","DOI":"10.1007\/978-3-031-19781-9_14"},{"key":"24_CR48","doi-asserted-by":"crossref","unstructured":"Dzabraev, M., Kalashnikov, M., Komkov, S., Petiushko. A.: Multidomain multimodal transformer for video retrieval. In: CVPRW (2021)","DOI":"10.1109\/CVPRW53098.2021.00374"},{"key":"24_CR49","doi-asserted-by":"crossref","unstructured":"Portillo-Quintero, J.A., Ortiz-Bayliss, J.C., Terashima-Mar\u00edn, H.: A straightforward framework for video retrieval using clip. In: MCPR (2021)","DOI":"10.1007\/978-3-030-77004-4_1"},{"key":"24_CR50","doi-asserted-by":"crossref","unstructured":"Gorti, S.K., et al.: Cross-modal language-video attention for text-video retrieval. In CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00495"},{"key":"24_CR51","unstructured":"Bain, M., Nagrani, A., Varol, G., Zisserman, A.: A clip-hitchhiker\u2019s guide to long video retrieval. arXiv Preprint (2022)"},{"key":"24_CR52","doi-asserted-by":"crossref","unstructured":"Owens, A., Efros, A.A.: Audio-visual scene analysis with self-supervised multisensory features. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01231-1_39"},{"key":"24_CR53","unstructured":"Korbar, B., Tran, D., Torresani, L.: Cooperative learning of audio and video models from self-supervised synchronization. In: NeurIPS (2018)"},{"key":"24_CR54","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Zisserman, A.: Look, listen and learn. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.73"},{"key":"24_CR55","doi-asserted-by":"crossref","unstructured":"Owens, A., Jiajun, W., McDermott, J.H., Freeman, W.T., Torralba, A.: Ambient sound provides supervision for visual learning. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46448-0_48"},{"key":"24_CR56","unstructured":"Asano, Y.M., Patrick, M., Rupprecht, C., Vedaldi, A.: Labelling unlabelled videos from scratch with multi-modal self-supervision. In: NeurIPS (2020)"},{"key":"24_CR57","unstructured":"Ma, S., Zeng, Z., McDuff, D., Song, Y.: Active contrastive learning of audio-visual video representations. In: ICLR (2021)"},{"key":"24_CR58","doi-asserted-by":"crossref","unstructured":"Morgado, P., Vasconcelos, N., Misra, I.: Audio-visual instance discrimination with cross-modal agreement. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01229"},{"key":"24_CR59","doi-asserted-by":"crossref","unstructured":"Morgado, P., Misra, I., Vasconcelos, N.: Robust audio-visual instance discrimination. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01274"},{"key":"24_CR60","doi-asserted-by":"crossref","unstructured":"Lin, Y.-B., Li, Y.-J., Wang, Y.-G.F.: Dual-modality seq2seq network for audio-visual event localization. In: ICASSP (2019)","DOI":"10.1109\/ICASSP.2019.8683226"},{"key":"24_CR61","unstructured":"Ma, S., Zeng, Z., McDuff, D., Song, Y.: Contrastive learning of global and local video representations. In: NeurIPS (2021)"},{"key":"24_CR62","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Doughty, H., Shao, L., Snoek, C.G.M.: Audio-adaptive activity recognition across video domains. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01342"},{"key":"24_CR63","unstructured":"Dosovitskiy, D., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: ICLR (2021)"},{"key":"24_CR64","doi-asserted-by":"crossref","unstructured":"Lin, Y.-B., Wang, Y.-C.F.: Audiovisual transformer with instance attention for audio-visual event localization. In: ACCV (2020)","DOI":"10.1007\/978-3-030-69544-6_17"},{"key":"24_CR65","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS (2017)"},{"key":"24_CR66","doi-asserted-by":"crossref","unstructured":"Shvetsova, N., et al.: Everything at once-multi-modal fusion transformer for video retrieval. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01939"},{"key":"24_CR67","doi-asserted-by":"crossref","unstructured":"Zellers, R., et al.: Neural script knowledge through vision and language and sound. In CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01589"},{"key":"24_CR68","unstructured":"Akbari, H., et al.: VATT: transformers for multimodal self-supervised learning from raw video, audio and text. In: NeurIPS (2021)"},{"key":"24_CR69","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Hessel, J., Yu, Y., Lu, X., Zellers, R., Choi, Y.: Connecting the dots between audio and text without parallel data through visual knowledge transfer. arXiv Preprint (2021)","DOI":"10.18653\/v1\/2022.naacl-main.333"},{"key":"24_CR70","unstructured":"Alayrac, J.-B., et al.: Self-supervised multimodal versatile networks. In: NeurIPS (2020)"},{"key":"24_CR71","doi-asserted-by":"crossref","unstructured":"Chen, B., et al.: Multimodal clustering networks for self-supervised learning from unlabeled videos. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00791"},{"key":"24_CR72","doi-asserted-by":"crossref","unstructured":"Lin, Y.-B., Frank Wang, Y-C.: Exploiting audio-visual consistency with partial supervision for spatial audio generation. In: AAAI (2021)","DOI":"10.1609\/aaai.v35i3.16302"},{"key":"24_CR73","unstructured":"Wang, S., et al.: Self-attention with linear complexity. arXiv Preprint, Linformer (2020)"},{"key":"24_CR74","unstructured":"Choromanski, K.M.: Rethinking attention with performers. In: ICLR (2021)"},{"key":"24_CR75","unstructured":"Patrick, M., et al.: Trajectory attention in video transformers. In: NeurIPS (2021)"},{"key":"24_CR76","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: ICML (2021)"},{"key":"24_CR77","doi-asserted-by":"crossref","unstructured":"Gemmeke, J.F., et al.: Audio set: an ontology and human-labeled dataset for audio events. In: ICASSP (2017)","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"24_CR78","doi-asserted-by":"crossref","unstructured":"Hershey, S., et al.: CNN architectures for large-scale audio classification. In: ICASSP (2017)","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"24_CR79","doi-asserted-by":"crossref","unstructured":"Chen, H., Xie, W., Vedaldi, A., Zisserman, A.: A large-scale audio-visual dataset. In: ICASSP (2020)","DOI":"10.1109\/ICASSP40776.2020.9053174"},{"key":"24_CR80","doi-asserted-by":"crossref","unstructured":"Gong, Y., Chung, Y.-A., Glass, J.: Audio spectrogram transformer. In INTEERSPEECH, AST (2021)","DOI":"10.21437\/Interspeech.2021-698"},{"key":"24_CR81","doi-asserted-by":"crossref","unstructured":"Gong, Y., Chung, Y.-A., Glass, J.: Improving audio tagging with pretraining, sampling, labeling, and aggregation. In: TASLP (2021)","DOI":"10.1109\/TASLP.2021.3120633"},{"key":"24_CR82","unstructured":"Paszke, A., et al.: Pytorch: an imperative style, high-performance deep learning library. In: NeurIPS (2019)"},{"key":"24_CR83","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"24_CR84","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: ICLR (2015)"},{"key":"24_CR85","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1007\/978-3-030-01261-8_23","volume-title":"Computer Vision \u2013 ECCV 2018","author":"B Zhang","year":"2018","unstructured":"Zhang, B., Hu, H., Sha, F.: Cross-modal and hierarchical modeling of video and text. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11217, pp. 385\u2013401. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_23"},{"key":"24_CR86","unstructured":"Chen, X., et al.: Data collection and evaluation server. arXiv Preprint, Microsoft coco captions (2015)"},{"key":"24_CR87","doi-asserted-by":"crossref","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. In: IJCV (2017)","DOI":"10.1007\/s11263-016-0981-7"},{"key":"24_CR88","doi-asserted-by":"crossref","unstructured":"Miech, A., et al.: Howto100m: Learning a text-video embedding by watching hundred million narrated video clips. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00272"},{"key":"24_CR89","doi-asserted-by":"crossref","unstructured":"Sharma, P., Ding, N., Goodman, S., Soricut, R.: Conceptual captions: A cleaned, hypernymed, image alt-text dataset for automatic image captioning. In: ACL (2018)","DOI":"10.18653\/v1\/P18-1238"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19830-4_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T09:20:25Z","timestamp":1728206425000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19830-4_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031198298","9783031198304"],"references-count":89,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19830-4_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"22 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}