{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T06:01:28Z","timestamp":1743055288011,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":30,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819794300"},{"type":"electronic","value":"9789819794317"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-9431-7_8","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T14:03:04Z","timestamp":1730383384000},"page":"97-109","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["PqE: Zero-Shot Document Expansion for\u00a0Dense Retrieval with\u00a0Large Language Models"],"prefix":"10.1007","author":[{"given":"Jiyuan","family":"Liu","sequence":"first","affiliation":[]},{"given":"Dongsheng","family":"Zou","sequence":"additional","affiliation":[]},{"given":"Naiquan","family":"Chai","sequence":"additional","affiliation":[]},{"given":"Yuming","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xinyi","family":"Song","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"8_CR1","doi-asserted-by":"crossref","unstructured":"Lee, K., Chang, M.W., Toutanova, K.: Latent retrieval for weakly supervised open domain question answering. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 6086\u20136096 (2019)","DOI":"10.18653\/v1\/P19-1612"},{"key":"8_CR2","doi-asserted-by":"crossref","unstructured":"Karpukhin, V., et al.: Dense passage retrieval for open-domain question answering. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 6769\u20136781 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"8_CR3","unstructured":"Izacard, G., et al.: Unsupervised dense information retrieval with contrastive learning. arXiv preprint arXiv:2112.09118 (2021)"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Gao, L., Callan, J.: Unsupervised corpus aware language model pre-training for dense passage retrieval. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics, vol.\u00a01 (2022)","DOI":"10.18653\/v1\/2022.acl-long.203"},{"key":"8_CR5","unstructured":"Bajaj, P., et al.: MS MARCO: a human generated machine reading comprehension dataset. arXiv preprint arXiv:1611.09268 (2016)"},{"key":"8_CR6","unstructured":"Xiao, S., Liu, Z., Zhang, P., Muennighof, N.: C-Pack: packaged resources to advance general Chinese embedding. arXiv preprint arXiv:2309.07597 (2023)"},{"key":"8_CR7","unstructured":"Rocchio\u00a0Jr, J.J.: Relevance feedback in information retrieval. The SMART Retrieval System: Experiments in Automatic Document Processing (1971)"},{"key":"8_CR8","doi-asserted-by":"crossref","unstructured":"Lavrenko, V., Croft, W.B.: Relevance-based language models. In: ACM SIGIR Forum, vol.\u00a051, pp. 260\u2013267. ACM New York, NY, USA (2017)","DOI":"10.1145\/3130348.3130376"},{"key":"8_CR9","first-page":"2","volume":"6","author":"R Nogueira","year":"2019","unstructured":"Nogueira, R., Lin, J., Epistemic, A.: From doc2query to docTTTTTquery. Online Prep. 6, 2 (2019)","journal-title":"Online Prep."},{"key":"8_CR10","unstructured":"Nogueira, R., Yang, W., Lin, J., Cho, K.: Document expansion by query prediction. arXiv preprint arXiv:1904.08375 (2019)"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"Lv, Y., Zhai, C.: A comparative study of methods for estimating query language models with pseudo feedback. In: Proceedings of the 18th ACM Conference on Information and Knowledge Management, pp. 1895\u20131898 (2009)","DOI":"10.1145\/1645953.1646259"},{"key":"8_CR12","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol. 33, pp. 1877\u20131901 (2020)"},{"key":"8_CR13","unstructured":"Touvron, H., et al.: LLaMA 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"8_CR14","unstructured":"Jiang, A.Q., et al.: Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)"},{"key":"8_CR15","doi-asserted-by":"crossref","unstructured":"Gao, L., Ma, X., Lin, J., Callan, J.: Precise zero-shot dense retrieval without relevance labels. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1762\u20131777 (2023)","DOI":"10.18653\/v1\/2023.acl-long.99"},{"key":"8_CR16","doi-asserted-by":"crossref","unstructured":"Wang, L., Yang, N., Wei, F.: Query2doc: query expansion with large language models. In: The 2023 Conference on Empirical Methods in Natural Language Processing (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.585"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Craswell, N., Mitra, B., Yilmaz, E., Campos, D., Voorhees, E.M.: Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003.07820 (2020)","DOI":"10.6028\/NIST.SP.1266.deep-overview"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Craswell, N., Mitra, B., Yilmaz, E., Campos, D.: Overview of the TREC 2020 deep learning track. arXiv:2102.07662 (2021)","DOI":"10.6028\/NIST.SP.1266.deep-overview"},{"key":"8_CR19","unstructured":"Thakur, N., Reimers, N., R\u00fcckl\u00e9, A., Srivastava, A., Gurevych, I.: BEIR: a heterogeneous benchmark for zero-shot evaluation of information retrieval models. In: Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2) (2021)"},{"key":"8_CR20","unstructured":"Tunstall, L., et al.: Zephyr: direct distillation of LM alignment. arXiv preprint arXiv:2310.16944 (2023)"},{"key":"8_CR21","unstructured":"Devlin, J.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT, vol.\u00a02019, p.\u00a04171 (2018)"},{"key":"8_CR22","unstructured":"Xiong, L., et al.: Approximate nearest neighbor negative contrastive learning for dense text retrieval. In: International Conference on Learning Representations (2020)"},{"key":"8_CR23","unstructured":"Sanh, V., et al.: Multitask prompted training enables zero-shot task generalization. In: International Conference on Learning Representations (2021)"},{"key":"8_CR24","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. In: Advances in Neural Information Processing Systems, vol. 35, pp. 27730\u201327744 (2022)"},{"issue":"11","key":"8_CR25","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1145\/219717.219748","volume":"38","author":"GA Miller","year":"1995","unstructured":"Miller, G.A.: WordNet: a lexical database for English. Commun. ACM 38(11), 39\u201341 (1995)","journal-title":"Commun. ACM"},{"key":"8_CR26","doi-asserted-by":"crossref","unstructured":"Formal, T., Piwowarski, B., Clinchant, S.: SPLADE: sparse lexical and expansion model for first stage ranking. In: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2288\u20132292 (2021)","DOI":"10.1145\/3404835.3463098"},{"issue":"70","key":"8_CR27","first-page":"1","volume":"25","author":"HW Chung","year":"2024","unstructured":"Chung, H.W., et al.: Scaling instruction-finetuned language models. J. Mach. Learn. Res. 25(70), 1\u201353 (2024)","journal-title":"J. Mach. Learn. Res."},{"issue":"3","key":"8_CR28","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2019","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2019)","journal-title":"IEEE Trans. Big Data"},{"key":"8_CR29","doi-asserted-by":"crossref","unstructured":"Lin, J., Ma, X., Lin, S.C., Yang, J.H., Pradeep, R., Nogueira, R.: Pyserini: a python toolkit for reproducible information retrieval research with sparse and dense representations. In: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2356\u20132362 (2021)","DOI":"10.1145\/3404835.3463238"},{"key":"8_CR30","unstructured":"Zhang, P., Zeng, G., Wang, T., Lu, W.: TinyLlama: an open-source small language model. arXiv preprint arXiv:2401.02385 (2024)"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-9431-7_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T14:30:13Z","timestamp":1730385013000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-9431-7_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9789819794300","9789819794317"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-9431-7_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2024\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}