{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T06:56:24Z","timestamp":1764053784287,"version":"3.40.3"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030997359"},{"type":"electronic","value":"9783030997366"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-99736-6_20","type":"book-chapter","created":{"date-parts":[[2022,4,4]],"date-time":"2022-04-04T19:02:47Z","timestamp":1649098967000},"page":"289-303","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Extending CLIP for Category-to-Image Retrieval in E-Commerce"],"prefix":"10.1007","author":[{"given":"Mariya","family":"Hendriksen","sequence":"first","affiliation":[]},{"given":"Maurits","family":"Bleeker","sequence":"additional","affiliation":[]},{"given":"Svitlana","family":"Vakulenko","sequence":"additional","affiliation":[]},{"given":"Nanne","family":"van Noord","sequence":"additional","affiliation":[]},{"given":"Ernst","family":"Kuiper","sequence":"additional","affiliation":[]},{"given":"Maarten","family":"de Rijke","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,4,5]]},"reference":[{"key":"20_CR1","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization arXiv preprint arXiv:160706450 (2016)"},{"key":"20_CR2","doi-asserted-by":"crossref","unstructured":"Bonab, H., Aliannejadi, M., Vardasbi, A., Kanoulas, E., Allan, J.: XMarket: cross-market training for product recommendation. In: CIKM, ACM (2021)","DOI":"10.1145\/3459637.3482493"},{"key":"20_CR3","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, PMLR, pp. 1597\u20131607 (2020)"},{"key":"20_CR4","unstructured":"Dai, Z., Lai, G., Yang, Y., Le, Q.V.: Funnel-transformer: Filtering out sequential redundancy for efficient language processing. arXiv preprint arXiv:200603236 (2020)"},{"key":"20_CR5","unstructured":"Dosovitskiy, A.: An image is worth $$16\\, \\times \\,16$$ words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021)"},{"key":"20_CR6","unstructured":"Goei, K., Hendriksen, M., de Rijke, M.: Tackling attribute fine-grainedness in cross-modal fashion search with multi-level features. In: SIGIR 2021 Workshop on eCommerce. ACM (2021)"},{"key":"20_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"752","DOI":"10.1007\/978-3-030-58580-8_44","volume-title":"Computer Vision \u2013 ECCV 2020","author":"T Gupta","year":"2020","unstructured":"Gupta, T., Vahdat, A., Chechik, G., Yang, X., Kautz, J., Hoiem, D.: Contrastive learning for weakly supervised phrase grounding. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020, Part III. LNCS, vol. 12348, pp. 752\u2013768. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58580-8_44"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"20_CR9","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs). arXiv preprint arXiv:160608415 (2016)"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Hewawalpita, S., Perera, I.: Multimodal user interaction framework for e-commerce. In: 2019 International Research Conference on Smart Computing and Systems Engineering (SCSE), pp 9\u201316. IEEE (2019)","DOI":"10.23919\/SCSE.2019.8842815"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Hu, R., Xu, H., Rohrbach, M., Feng, J., Saenko, K., Darrell, T.: Natural language object retrieval. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 4555\u20134564 (2016)","DOI":"10.1109\/CVPR.2016.493"},{"issue":"6","key":"20_CR12","doi-asserted-by":"publisher","first-page":"809","DOI":"10.1016\/S0306-4573(00)00016-9","volume":"36","author":"KS Jones","year":"2000","unstructured":"Jones, K.S., Walker, S., Robertson, S.E.: A probabilistic model of information retrieval: development and comparative experiments: Part 2. Inf. Process. Manage. 36(6), 809\u2013840 (2000)","journal-title":"Inf. Process. Manage."},{"key":"20_CR13","unstructured":"Kondylidis, N., Zou, J., Kanoulas, E.: Category aware explainable conversational recommendation. arXiv preprint arXiv:210308733 (2021)"},{"key":"20_CR14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-15436-3_4","volume-title":"Fashion Communication in the Digital Age","author":"K Laenen","year":"2019","unstructured":"Laenen, K., Moens, M.F.: Multimodal neural machine translation of fashion E-commerce descriptions. In: Kalbaska, N., S\u00e1daba, T., Cominelli, F., Cantoni, L. (eds.) FACTUM 2019. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-15436-3_4"},{"issue":"6","key":"20_CR15","doi-asserted-by":"publisher","first-page":"102316","DOI":"10.1016\/j.ipm.2020.102316","volume":"57","author":"K Laenen","year":"2020","unstructured":"Laenen, K., Moens, M.F.: A comparative study of outfit recommendation methods with a focus on attention-based fusion. Inf. Process. Manage. 57(6), 102316 (2020)","journal-title":"Inf. Process. Manage."},{"key":"20_CR16","unstructured":"Laenen, K., Zoghbi, S., Moens, M.F.: Cross-modal search for fashion attributes. In: Proceedings of the KDD 2017 Workshop on Machine Learning Meets Fashion, vol. 2017, pp 1\u201310, ACM (2017)"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Laenen, K., Zoghbi, S., Moens, M.F.: Web search of fashion items with multimodal querying. In: Proceedings of the Eleventh ACM International Conference on Web Search and Data Mining, pp. 342\u2013350 (2018)","DOI":"10.1145\/3159652.3159716"},{"key":"20_CR18","doi-asserted-by":"crossref","unstructured":"Li, H., Yuan, P., Xu, S., Wu, Y., He, X., Zhou, B.: Aspect-aware multimodal summarization for Chinese e-commerce products. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 8188\u20138195 (2020)","DOI":"10.1609\/aaai.v34i05.6332"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Li, X., Wang, X., He, X., Chen, L., Xiao, J., Chua, T.S.: Hierarchical fashion graph network for personalized outfit recommendation. In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp 159\u2013168 (2020)","DOI":"10.1145\/3397271.3401080"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Liao, L., He, X., Zhao, B., Ngo, C.W., Chua, T.S.: Interpretable multimodal retrieval for fashion products. In: Proceedings of the 26th ACM International Conference on Multimedia, pp 1571\u20131579 (2018)","DOI":"10.1145\/3240508.3240646"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"Lin, Y., Ren, P., Chen, Z., Ren, Z., Ma, J., de Rijke, M.: Improving outfit recommendation with co-supervision of fashion generation. In: The World Wide Web Conference, pp. 1095\u20131105 (2019)","DOI":"10.1145\/3308558.3313614"},{"key":"20_CR22","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:171105101 (2017)"},{"key":"20_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1007\/978-3-030-01246-5_11","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Nagarajan","year":"2018","unstructured":"Nagarajan, T., Grauman, K.: Attributes as operators: factorizing unseen attribute-object compositions. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018, Part I. LNCS, vol. 11205, pp. 172\u2013190. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_11"},{"key":"20_CR24","unstructured":"Nielsen, J., Molich, R., Snyder, C., Farrell, S.: E-commerce user experience. Nielsen Norman Group (2000)"},{"key":"20_CR25","unstructured":"Avd, O., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv preprint arXiv:180703748 (2018)"},{"key":"20_CR26","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. arXiv preprint arXiv:210300020 (2021)"},{"key":"20_CR27","unstructured":"Shen, S., et al.: How much can CLIP benefit vision-and-language tasks? arXiv preprint arXiv:210706383 (2021)"},{"issue":"12","key":"20_CR28","doi-asserted-by":"publisher","first-page":"1349","DOI":"10.1007\/978-3-540-74769-7_81","volume":"22","author":"A Smeulders","year":"2000","unstructured":"Smeulders, A., Worring, M., Santini, S., Gupta, A., Jain, R.: Content-based image retrieval at the end of the early years. IEEE Trans. Pattern Anal. Mach. Intell. 22(12), 1349\u20131380 (2000). https:\/\/doi.org\/10.1007\/978-3-540-74769-7_81","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"20_CR29","unstructured":"Song, K., Tan, X., Qin, T., Lu, J., Liu, T.Y.: MPNet: Masked and permuted pre-training for language understanding. arXiv preprint arXiv:200409297 (2020)"},{"key":"20_CR30","doi-asserted-by":"crossref","unstructured":"Tagliabue, J., Yu, B., Beaulieu, M.: How to grow a (product) tree: personalized category suggestions for ecommerce type-ahead. arXiv preprint arXiv:200512781 (2020)","DOI":"10.18653\/v1\/2020.ecnlp-1.2"},{"key":"20_CR31","doi-asserted-by":"publisher","first-page":"84613","DOI":"10.1109\/ACCESS.2019.2923552","volume":"7","author":"I Tautkute","year":"2019","unstructured":"Tautkute, I., Trzci\u0144ski, T., Skorupa, A.P., Brocki, \u0141, Marasek, K.: DeepStyle: multimodal search engine for fashion and interior design. IEEE Access 7, 84613\u201384628 (2019)","journal-title":"IEEE Access"},{"issue":"2","key":"20_CR32","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1145\/2812802","volume":"59","author":"B Thomee","year":"2016","unstructured":"Thomee, B., et al.: YFCC100M: the new data in multimedia research. Commun. ACM 59(2), 64\u201373 (2016)","journal-title":"Commun. ACM"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"Tsagkias, M., King,T.H., Kallumadi, S., Murdock, V., de Rijke, M.: Challenges and research opportunities in ecommerce search and recommendations. In: SIGIR Forum, vol. 54, issue number 1 (2020)","DOI":"10.1145\/3451964.3451966"},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Vo, N., et al.: Composing text and image for image retrieval-an empirical odyssey. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6439\u20136448 (2019)","DOI":"10.1109\/CVPR.2019.00660"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Wang, S., Zhuang, S., Zuccon, G.: Bert-based dense retrievers require interpolation with BM25 for effective passage retrieval. In: Proceedings of the 2021 ACM SIGIR International Conference on Theory of Information Retrieval, pp. 317\u2013324 (2021)","DOI":"10.1145\/3471158.3472233"},{"key":"20_CR36","unstructured":"Wirojwatanakul, P., Wangperawong, A.: Multi-label product categorization using multi-modal fusion models. arXiv preprint arXiv:190700420 (2019)"},{"key":"20_CR37","unstructured":"Yamaura, Y., Kanemaki, N., Tsuboshita, Y.: The resale price prediction of secondhand jewelry items using a multi-modal deep model with iterative co-attention. arXiv preprint arXiv:190700661 (2019)"},{"key":"20_CR38","doi-asserted-by":"crossref","unstructured":"Yang, X., et al.: Interpretable fashion matching with rich attributes. In: Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 775\u2013784 (2019)","DOI":"10.1145\/3331184.3331242"},{"key":"20_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1007\/978-3-319-54193-8_6","volume-title":"Computer Vision \u2013 ACCV 2016","author":"T Yashima","year":"2017","unstructured":"Yashima, T., Okazaki, N., Inui, K., Yamaguchi, K., Okatani, T.: Learning to describe e-commerce images from noisy online data. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016, Part V. LNCS, vol. 10115, pp. 85\u2013100. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54193-8_6"},{"key":"20_CR40","unstructured":"Yim, J., Kim, J.J., Shin, D.: One-shot item search with multimodal data. arXiv preprint arXiv:181110969 (2018)"},{"key":"20_CR41","unstructured":"Zhang, Y., Jiang, H., Miura, Y., Manning, C.D., Langlotz, C.P.: Contrastive learning of medical visual representations from paired images and text. arXiv preprint arXiv:201000747 (2020)"},{"key":"20_CR42","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1007\/978-3-319-27674-8_35","volume-title":"MultiMedia Modeling","author":"S Zoghbi","year":"2016","unstructured":"Zoghbi, S., Heyman, G., Gomez, J.C., Moens, M.-F.: Cross-modal fashion search. In: Tian, Q., Sebe, N., Qi, G.-J., Huet, B., Hong, R., Liu, X. (eds.) MMM 2016, Part II. LNCS, vol. 9517, pp. 367\u2013373. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-27674-8_35"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-99736-6_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T08:56:52Z","timestamp":1710233812000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-99736-6_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030997359","9783030997366"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-99736-6_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"5 April 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Stavanger","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Norway","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 April 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 April 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"44","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecir2022.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"395","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"35","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4-6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Additionally, there are other papers: 11 reproducibility, 12 doctoral, 13 CLEF Labs, 5 workshops and 4 tutorials.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}