{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:09:04Z","timestamp":1750219744682,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,21]],"date-time":"2023-10-21T00:00:00Z","timestamp":1697846400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the National Natural Science Foundation of China","award":["No. 62272467 and No. 61832017"],"award-info":[{"award-number":["No. 62272467 and No. 61832017"]}]},{"name":"Beijing Outstanding Young Scientist Program","award":["No. BJJWZYJH012019100020098"],"award-info":[{"award-number":["No. BJJWZYJH012019100020098"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,21]]},"DOI":"10.1145\/3583780.3615107","type":"proceedings-article","created":{"date-parts":[[2023,10,21]],"date-time":"2023-10-21T07:45:42Z","timestamp":1697874342000},"page":"3104-3113","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["VILE: Block-Aware Visual Enhanced Document Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-6855-6558","authenticated-orcid":false,"given":"Huaying","family":"Yuan","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9781-948X","authenticated-orcid":false,"given":"Zhicheng","family":"Dou","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3530-3787","authenticated-orcid":false,"given":"Yujia","family":"Zhou","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8075-6021","authenticated-orcid":false,"given":"Yu","family":"Guo","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9777-9676","authenticated-orcid":false,"given":"Ji-Rong","family":"Wen","sequence":"additional","affiliation":[{"name":"Engineering Research Center of Next-Generation Intelligent Search and Recommendation, Ministry of Education &amp; Renmin University of China, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Pre-training tasks for embedding-based large-scale retrieval. arXiv preprint arXiv:2002.03932","author":"Chang Wei-Cheng","year":"2020","unstructured":"Wei-Cheng Chang , Felix X Yu , Yin-Wen Chang , Yiming Yang , and Sanjiv Kumar . 2020. Pre-training tasks for embedding-based large-scale retrieval. arXiv preprint arXiv:2002.03932 ( 2020 ). Wei-Cheng Chang, Felix X Yu, Yin-Wen Chang, Yiming Yang, and Sanjiv Kumar. 2020. Pre-training tasks for embedding-based large-scale retrieval. arXiv preprint arXiv:2002.03932 (2020)."},{"key":"e_1_3_2_1_2_1","volume-title":"Electra: Pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555","author":"Clark Kevin","year":"2020","unstructured":"Kevin Clark , Minh-Thang Luong , Quoc V Le , and Christopher D Manning . 2020 . Electra: Pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555 (2020). Kevin Clark, Minh-Thang Luong, Quoc V Le, and Christopher D Manning. 2020. Electra: Pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555 (2020)."},{"key":"e_1_3_2_1_3_1","volume-title":"TREC 2014 web track overview. Technical Report. MICHIGAN UNIV ANN ARBOR.","author":"Collins-Thompson Kevyn","year":"2015","unstructured":"Kevyn Collins-Thompson , Craig Macdonald , Paul Bennett , Fernando Diaz , and Ellen M Voorhees . 2015 . TREC 2014 web track overview. Technical Report. MICHIGAN UNIV ANN ARBOR. Kevyn Collins-Thompson, Craig Macdonald, Paul Bennett, Fernando Diaz, and Ellen M Voorhees. 2015. TREC 2014 web track overview. Technical Report. MICHIGAN UNIV ANN ARBOR."},{"key":"e_1_3_2_1_4_1","volume-title":"Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003","author":"Craswell Nick","year":"2020","unstructured":"Nick Craswell , Bhaskar Mitra , Emine Yilmaz , Daniel Campos , and Ellen M Voorhees . 2020 . Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003 .07820 (2020). Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen M Voorhees. 2020. Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003.07820 (2020)."},{"key":"e_1_3_2_1_5_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2018 . Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018). Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_6_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly etal 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020).  Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3132943"},{"key":"e_1_3_2_1_8_1","volume-title":"Condenser: a pre-training architecture for dense retrieval. arXiv preprint arXiv:2104.08253","author":"Gao Luyu","year":"2021","unstructured":"Luyu Gao and Jamie Callan . 2021a. Condenser: a pre-training architecture for dense retrieval. arXiv preprint arXiv:2104.08253 ( 2021 ). Luyu Gao and Jamie Callan. 2021a. Condenser: a pre-training architecture for dense retrieval. arXiv preprint arXiv:2104.08253 (2021)."},{"key":"e_1_3_2_1_9_1","volume-title":"Unsupervised corpus aware language model pre-training for dense passage retrieval. arXiv preprint arXiv:2108.05540","author":"Gao Luyu","year":"2021","unstructured":"Luyu Gao and Jamie Callan . 2021b. Unsupervised corpus aware language model pre-training for dense passage retrieval. arXiv preprint arXiv:2108.05540 ( 2021 ). Luyu Gao and Jamie Callan. 2021b. Unsupervised corpus aware language model pre-training for dense passage retrieval. arXiv preprint arXiv:2108.05540 (2021)."},{"key":"e_1_3_2_1_10_1","volume-title":"COIL: Revisit exact lexical match in information retrieval with contextualized inverted list. arXiv preprint arXiv:2104.07186","author":"Gao Luyu","year":"2021","unstructured":"Luyu Gao , Zhuyun Dai , and Jamie Callan . 2021 . COIL: Revisit exact lexical match in information retrieval with contextualized inverted list. arXiv preprint arXiv:2104.07186 (2021). Luyu Gao, Zhuyun Dai, and Jamie Callan. 2021. COIL: Revisit exact lexical match in information retrieval with contextualized inverted list. arXiv preprint arXiv:2104.07186 (2021)."},{"volume-title":"Visual attention and web design","author":"Grier Rebecca A","key":"e_1_3_2_1_11_1","unstructured":"Rebecca A Grier . 2004. Visual attention and web design . University of Cincinnati . Rebecca A Grier. 2004. Visual attention and web design. University of Cincinnati."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_13_1","volume-title":"Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih.","author":"Karpukhin Vladimir","year":"2020","unstructured":"Vladimir Karpukhin , Barlas Oug uz , Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih. 2020 . Dense passage retrieval for open-domain question answering. arXiv preprint arXiv:2004.04906 (2020). Vladimir Karpukhin, Barlas Oug uz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih. 2020. Dense passage retrieval for open-domain question answering. arXiv preprint arXiv:2004.04906 (2020)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1080\/0144929X.2019.1602167"},{"key":"e_1_3_2_1_15_1","volume-title":"A method for stochastic optimization. arXiv","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma , J Adam Ba , and J Adam . 2020. A method for stochastic optimization. arXiv 2014 . arXiv preprint arXiv:1412.6980, Vol. 106 (2020). Diederik P Kingma, J Adam Ba, and J Adam. 2020. A method for stochastic optimization. arXiv 2014. arXiv preprint arXiv:1412.6980, Vol. 106 (2020)."},{"key":"e_1_3_2_1_16_1","volume-title":"Visualbert: A simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557","author":"Li Liunian Harold","year":"2019","unstructured":"Liunian Harold Li , Mark Yatskar , Da Yin , Cho-Jui Hsieh , and Kai-Wei Chang . 2019 . Visualbert: A simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019). Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, and Kai-Wei Chang. 2019. Visualbert: A simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3471158.3472245"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463238"},{"key":"e_1_3_2_1_19_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu , Myle Ott , Naman Goyal , Jingfei Du , Mandar Joshi , Danqi Chen , Omer Levy , Mike Lewis , Luke Zettlemoyer , and Veselin Stoyanov . 2019 . Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019). Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_20_1","volume-title":"Retromae: Pre-training retrieval-oriented transformers via masked auto-encoder. arXiv preprint arXiv:2205.12035","author":"Liu Zheng","year":"2022","unstructured":"Zheng Liu and Yingxia Shao . 2022 . Retromae: Pre-training retrieval-oriented transformers via masked auto-encoder. arXiv preprint arXiv:2205.12035 (2022). Zheng Liu and Yingxia Shao. 2022. Retromae: Pre-training retrieval-oriented transformers via masked auto-encoder. arXiv preprint arXiv:2205.12035 (2022)."},{"key":"e_1_3_2_1_21_1","volume-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu , Dhruv Batra , Devi Parikh , and Stefan Lee . 2019 . Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems , Vol. 32 (2019). Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3409256.3409829"},{"key":"e_1_3_2_1_23_1","volume-title":"Information retrieval from documents: A survey. Information retrieval","author":"Mitra Mandar","year":"2000","unstructured":"Mandar Mitra and BB Chaudhuri . 2000. Information retrieval from documents: A survey. Information retrieval , Vol. 2 ( 2000 ), 141--163. Mandar Mitra and BB Chaudhuri. 2000. Information retrieval from documents: A survey. Information retrieval, Vol. 2 (2000), 141--163."},{"key":"e_1_3_2_1_24_1","volume-title":"MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. In NIPS","author":"Nguyen Tri","year":"2016","unstructured":"Tri Nguyen , Mir Rosenberg , Xia Song , 2016 . MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. In NIPS 2016. Tri Nguyen, Mir Rosenberg, Xia Song, et al. 2016. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. In NIPS 2016."},{"key":"e_1_3_2_1_25_1","volume-title":"Document expansion by query prediction. arXiv preprint arXiv:1904.08375","author":"Nogueira Rodrigo","year":"2019","unstructured":"Rodrigo Nogueira , Wei Yang , Jimmy Lin , and Kyunghyun Cho . 2019. Document expansion by query prediction. arXiv preprint arXiv:1904.08375 ( 2019 ). Rodrigo Nogueira, Wei Yang, Jimmy Lin, and Kyunghyun Cho. 2019. Document expansion by query prediction. arXiv preprint arXiv:1904.08375 (2019)."},{"key":"e_1_3_2_1_26_1","volume-title":"Imagebert: Cross-modal pre-training with large-scale weak-supervised image-text data. arXiv preprint arXiv:2001.07966","author":"Qi Di","year":"2020","unstructured":"Di Qi , Lin Su , Jia Song , Edward Cui , Taroon Bharti , and Arun Sacheti . 2020 . Imagebert: Cross-modal pre-training with large-scale weak-supervised image-text data. arXiv preprint arXiv:2001.07966 (2020). Di Qi, Lin Su, Jia Song, Edward Cui, Taroon Bharti, and Arun Sacheti. 2020. Imagebert: Cross-modal pre-training with large-scale weak-supervised image-text data. arXiv preprint arXiv:2001.07966 (2020)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-009-9123-y"},{"key":"e_1_3_2_1_28_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford , Jong Wook Kim , Chris Hallacy , Aditya Ramesh , Gabriel Goh , Sandhini Agarwal , Girish Sastry , Amanda Askell , Pamela Mishkin , Jack Clark , 2021 . Learning transferable visual models from natural language supervision . In International conference on machine learning. PMLR, 8748--8763 . Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.472"},{"key":"e_1_3_2_1_31_1","volume-title":"Vl-bert: Pre-training of generic visual-linguistic representations. arXiv preprint arXiv:1908.08530","author":"Su Weijie","year":"2019","unstructured":"Weijie Su , Xizhou Zhu , Yue Cao , Bin Li , Lewei Lu , Furu Wei , and Jifeng Dai . 2019 . Vl-bert: Pre-training of generic visual-linguistic representations. arXiv preprint arXiv:1908.08530 (2019). Weijie Su, Xizhou Zhu, Yue Cao, Bin Li, Lewei Lu, Furu Wei, and Jifeng Dai. 2019. Vl-bert: Pre-training of generic visual-linguistic representations. arXiv preprint arXiv:1908.08530 (2019)."},{"key":"e_1_3_2_1_32_1","volume-title":"International handbook of internet research","author":"Thorlacius Lisbeth","year":"2010","unstructured":"Lisbeth Thorlacius . 2010. Visual communication in web design--analyzing visual communication in web design . International handbook of internet research ( 2010 ), 455--476. Lisbeth Thorlacius. 2010. Visual communication in web design--analyzing visual communication in web design. International handbook of internet research (2010), 455--476."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-021-09398-0"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Bram van den Akker Ilya Markov and Maarten de Rijke. 2019. ViTOR: learning to rank webpages based on visual features. In The world wide web conference. 3279--3285.  Bram van den Akker Ilya Markov and Maarten de Rijke. 2019. ViTOR: learning to rank webpages based on visual features. In The world wide web conference. 3279--3285.","DOI":"10.1145\/3308558.3313419"},{"key":"e_1_3_2_1_35_1","volume-title":"Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808","author":"Xiong Lee","year":"2020","unstructured":"Lee Xiong , Chenyan Xiong , Ye Li , Kwok-Fung Tang , Jialin Liu , Paul Bennett , Junaid Ahmed , and Arnold Overwijk . 2020. Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808 ( 2020 ). Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul Bennett, Junaid Ahmed, and Arnold Overwijk. 2020. Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808 (2020)."},{"key":"e_1_3_2_1_36_1","volume-title":"Simple applications of BERT for ad hoc document retrieval. arXiv preprint arXiv:1903.10972","author":"Yang Wei","year":"2019","unstructured":"Wei Yang , Haotian Zhang , and Jimmy Lin . 2019. Simple applications of BERT for ad hoc document retrieval. arXiv preprint arXiv:1903.10972 ( 2019 ). Wei Yang, Haotian Zhang, and Jimmy Lin. 2019. Simple applications of BERT for ad hoc document retrieval. arXiv preprint arXiv:1903.10972 (2019)."},{"key":"e_1_3_2_1_37_1","volume-title":"Large batch optimization for deep learning: Training bert in 76 minutes. arXiv preprint arXiv:1904.00962","author":"You Yang","year":"2019","unstructured":"Yang You , Jing Li , Sashank Reddi , Jonathan Hseu , Sanjiv Kumar , Srinadh Bhojanapalli , Xiaodan Song , James Demmel , Kurt Keutzer , and Cho-Jui Hsieh . 2019. Large batch optimization for deep learning: Training bert in 76 minutes. arXiv preprint arXiv:1904.00962 ( 2019 ). Yang You, Jing Li, Sashank Reddi, Jonathan Hseu, Sanjiv Kumar, Srinadh Bhojanapalli, Xiaodan Song, James Demmel, Kurt Keutzer, and Cho-Jui Hsieh. 2019. Large batch optimization for deep learning: Training bert in 76 minutes. arXiv preprint arXiv:1904.00962 (2019)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462880"},{"key":"e_1_3_2_1_39_1","volume-title":"Repbert: Contextualized text embeddings for first-stage retrieval. arXiv preprint arXiv:2006.15498","author":"Zhan Jingtao","year":"2020","unstructured":"Jingtao Zhan , Jiaxin Mao , Yiqun Liu , Min Zhang , and Shaoping Ma . 2020 . Repbert: Contextualized text embeddings for first-stage retrieval. arXiv preprint arXiv:2006.15498 (2020). Jingtao Zhan, Jiaxin Mao, Yiqun Liu, Min Zhang, and Shaoping Ma. 2020. Repbert: Contextualized text embeddings for first-stage retrieval. arXiv preprint arXiv:2006.15498 (2020)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271673"}],"event":{"name":"CIKM '23: The 32nd ACM International Conference on Information and Knowledge Management","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Birmingham United Kingdom","acronym":"CIKM '23"},"container-title":["Proceedings of the 32nd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583780.3615107","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3583780.3615107","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:42Z","timestamp":1750178202000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583780.3615107"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,21]]},"references-count":40,"alternative-id":["10.1145\/3583780.3615107","10.1145\/3583780"],"URL":"https:\/\/doi.org\/10.1145\/3583780.3615107","relation":{},"subject":[],"published":{"date-parts":[[2023,10,21]]},"assertion":[{"value":"2023-10-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}