{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T20:15:56Z","timestamp":1771704956160,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62222203,62072080"],"award-info":[{"award-number":["62222203,62072080"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Technology Innovation Committee of Shenzhen Municipality Foundation","award":["JCYJ20210324132203007"],"award-info":[{"award-number":["JCYJ20210324132203007"]}]},{"name":"New Cornerstone Science Foundation through the XPLORER PRIZE"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657826","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T12:40:05Z","timestamp":1720701605000},"page":"197-207","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Unsupervised Cross-Domain Image Retrieval with Semantic-Attended Mixture-of-Experts"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-0468-5034","authenticated-orcid":false,"given":"Kai","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4257-096X","authenticated-orcid":false,"given":"Jiayang","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5685-3123","authenticated-orcid":false,"given":"Xing","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2549-8322","authenticated-orcid":false,"given":"Jingkuan","family":"Song","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0011-6260","authenticated-orcid":false,"given":"Xin","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Huaqiao University, Xiamen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2999-2088","authenticated-orcid":false,"given":"Heng Tao","family":"Shen","sequence":"additional","affiliation":[{"name":"College of Electronic and Information Engineering, Tongji University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Social Networks and Texts: 7th International Conference, AIST 2018","author":"Andreeva Elena","year":"2018","unstructured":"Elena Andreeva, Dmitry I Ignatov, Artem Grachev, and Andrey V Savchenko. 2018. Extraction of visual features for recommendation of products via deep learning. In Analysis of Images, Social Networks and Texts: 7th International Conference, AIST 2018, Moscow, Russia, July 5--7, 2018, Revised Selected Papers 7. Springer, 201--210."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/6046.865481"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_1_4_1","volume-title":"International Journal of Computer Vision","author":"Chen Xiaokang","year":"2023","unstructured":"Xiaokang Chen, Mingyu Ding, Xiaodi Wang, Ying Xin, Shentong Mo, Yunhao Wang, Shumin Han, Ping Luo, Gang Zeng, and Jingdong Wang. 2023. Context autoencoder for self-supervised representation learning. International Journal of Computer Vision (2023), 1--16."},{"key":"e_1_3_2_1_5_1","first-page":"34600","article-title":"On the representation collapse of sparse mixture of experts","volume":"35","author":"Chi Zewen","year":"2022","unstructured":"Zewen Chi, Li Dong, Shaohan Huang, Damai Dai, Shuming Ma, Barun Patra, Saksham Singhal, Payal Bajaj, Xia Song, Xian-Ling Mao, et al. 2022. On the representation collapse of sparse mixture of experts. Advances in Neural Information Processing Systems , Vol. 35 (2022), 34600--34613.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1348246.1348248"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-007-9039-3"},{"key":"e_1_3_2_1_8_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/3586589.3586709"},{"key":"e_1_3_2_1_10_1","volume-title":"Advances in Neural Information Processing Systems 20. Curran Associates","author":"Ferrari Vittorio","unstructured":"Vittorio Ferrari and Andrew Zisserman. 2007. Learning Visual Attributes. In Advances in Neural Information Processing Systems 20. Curran Associates, Inc., 433--440."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings, Part VI 14","author":"Gordo Albert","year":"2016","unstructured":"Albert Gordo, Jon Almaz\u00e1n, Jerome Revaud, and Diane Larlus. 2016. Deep image retrieval: Learning global representations for image search. In Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part VI 14. Springer, 241--257."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/2.410145"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_30"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01011"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.127"},{"key":"e_1_3_2_1_17_1","volume-title":"Adaptive mixtures of local experts. Neural computation","author":"Jacobs Robert A","year":"1991","unstructured":"Robert A Jacobs, Michael I Jordan, Steven J Nowlan, and Geoffrey E Hinton. 1991. Adaptive mixtures of local experts. Neural computation, Vol. 3, 1 (1991), 79--87."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/0031-3203(95)00160-3"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123429"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548309"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3211850"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00250"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1013208.1013210"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00899"},{"key":"e_1_3_2_1_25_1","volume-title":"Gshard: Scaling giant models with conditional computation and automatic sharding. arXiv preprint arXiv:2006.16668","author":"Lepikhin Dmitry","year":"2020","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2020. Gshard: Scaling giant models with conditional computation and automatic sharding. arXiv preprint arXiv:2006.16668 (2020)."},{"key":"e_1_3_2_1_26_1","volume-title":"International Conference on Machine Learning. PMLR, 6265--6274","author":"Lewis Mike","year":"2021","unstructured":"Mike Lewis, Shruti Bhosale, Tim Dettmers, Naman Goyal, and Luke Zettlemoyer. 2021. Base layers: Simplifying training of large, sparse models. In International Conference on Machine Learning. PMLR, 6265--6274."},{"key":"e_1_3_2_1_27_1","volume-title":"Sparse mixture-of-experts are domain generalizable learners. arXiv preprint arXiv:2206.04046","author":"Li Bo","year":"2022","unstructured":"Bo Li, Yifei Shen, Jingkang Yang, Yezhen Wang, Jiawei Ren, Tong Che, Jun Zhang, and Ziwei Liu. 2022. Sparse mixture-of-experts are domain generalizable learners. arXiv preprint arXiv:2206.04046 (2022)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Dongge Li Nevenka Dimitrova Mingkun Li and Ishwar K Sethi. 2003. Multimedia content processing through cross-modal association. In ACM MM. 604--611.","DOI":"10.1145\/957013.957143"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3306738"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612244"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01471"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Kaiyue Pang Yi-Zhe Song Tony Xiang and Timothy M Hospedales. 2017. Cross-domain Generative Learning for Fine-Grained Sketch-Based Image Retrieval.. In BMVC. 1--12.","DOI":"10.5244\/C.31.46"},{"key":"e_1_3_2_1_33_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems , Vol. 32 (2019)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01184"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00149"},{"key":"e_1_3_2_1_36_1","volume-title":"Emanuele Coviello, Gabriel Doyle, Gert RG Lanckriet, Roger Levy, and Nuno Vasconcelos.","author":"Rasiwasia Nikhil","year":"2010","unstructured":"Nikhil Rasiwasia, Jose Costa Pereira, Emanuele Coviello, Gabriel Doyle, Gert RG Lanckriet, Roger Levy, and Nuno Vasconcelos. 2010. A new approach to cross-modal multimedia retrieval. In ACM MM. 251--260."},{"key":"e_1_3_2_1_37_1","first-page":"8583","article-title":"Scaling vision with sparse mixture of experts","volume":"34","author":"Riquelme Carlos","year":"2021","unstructured":"Carlos Riquelme, Joan Puigcerver, Basil Mustafa, Maxim Neumann, Rodolphe Jenatton, Andr\u00e9 Susano Pinto, Daniel Keysers, and Neil Houlsby. 2021. Scaling vision with sparse mixture of experts. Advances in Neural Information Processing Systems , Vol. 34 (2021), 8583--8595.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2000.855825"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1006\/jvci.1999.0413"},{"key":"e_1_3_2_1_40_1","volume-title":"Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2017. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538 (2017)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.895972"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.592"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3532028"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000004830.93820.78"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.572"},{"key":"e_1_3_2_1_46_1","volume-title":"Joint feature selection and subspace learning for cross-modal retrieval","author":"Wang Kaiye","year":"2015","unstructured":"Kaiye Wang, Ran He, Liang Wang, Wei Wang, and Tieniu Tan. 2015. Joint feature selection and subspace learning for cross-modal retrieval. IEEE transactions on pattern analysis and machine intelligence, Vol. 38, 10 (2015), 2010--2023."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548382"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.04.046"},{"key":"e_1_3_2_1_49_1","volume-title":"Correspondence-Free Domain Alignment for Unsupervised Cross-Domain Image Retrieval. In Thirty-Seventh AAAI Conference on Artificial Intelligence. 10200--10208","author":"Wang Xu","year":"2023","unstructured":"Xu Wang, Dezhong Peng, Ming Yan, and Peng Hu. 2023. Correspondence-Free Domain Alignment for Unsupervised Cross-Domain Image Retrieval. In Thirty-Seventh AAAI Conference on Artificial Intelligence. 10200--10208."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2949697"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1352"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.93"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01362"},{"key":"e_1_3_2_1_54_1","volume-title":"Aude Oliva, and Antonio Torralba.","author":"Zhou Bolei","year":"2015","unstructured":"Bolei Zhou, Aditya Khosla, \u00c0 gata Lapedriza, Aude Oliva, and Antonio Torralba. 2015. Object Detectors Emerge in Deep Scene CNNs. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13735-022-00244-7"}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Washington DC USA","acronym":"SIGIR 2024","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657826","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657826","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:37:15Z","timestamp":1755841035000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657826"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":55,"alternative-id":["10.1145\/3626772.3657826","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657826","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}