{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:26:02Z","timestamp":1759883162792,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032060686","type":"print"},{"value":"9783032060693","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06069-3_1","type":"book-chapter","created":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T15:53:40Z","timestamp":1759852420000},"page":"3-13","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["ViSketch-GPT: Collaborative Multi-scale Feature Extraction For Hand-Drawn Sketch Retrieval"],"prefix":"10.1007","author":[{"given":"Giulio","family":"Federico","sequence":"first","affiliation":[]},{"given":"Fabio","family":"Carrara","sequence":"additional","affiliation":[]},{"given":"Claudio","family":"Gennaro","sequence":"additional","affiliation":[]},{"given":"Marco","family":"Di Benedetto","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,8]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Bhunia, A.K., Chowdhury, P.N., Sain, A., Yang, Y., Xiang, T., Song, Y.Z.: More photos are all you need: Semi-supervised learning for fine-grained sketch based image retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4247\u20134256 (2021)","DOI":"10.1109\/CVPR46437.2021.00423"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Bhunia, A.K., et al.: Sketching without worrying: noise-tolerant sketch-based image retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 999\u20131008 (2022)","DOI":"10.1109\/CVPR52688.2022.00107"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Chaudhuri, A., Bhunia, A.K., Song, Y.Z., Dutta, A.: Data-free sketch-based image retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12084\u201312093 (June 2023)","DOI":"10.1109\/CVPR52729.2023.01163"},{"key":"1_CR4","unstructured":"Chaudhuri, A., Mancini, M., Chen, Y., Akata, Z., Dutta, A.: Cross-modal fusion distillation for fine-grained sketch-based image retrieval. arXiv preprint arXiv:2210.10486 (2022)"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Chen, H., et al.: Data-free learning of student networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3514\u20133522 (2019)","DOI":"10.1109\/ICCV.2019.00361"},{"key":"1_CR6","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis, Minnesota (Jun 2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423, https:\/\/aclanthology.org\/N19-1423\/","DOI":"10.18653\/v1\/N19-1423"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Dey, S., Riba, P., Dutta, A., Llados, J., Song, Y.Z.: Doodle to search: practical zero-shot sketch-based image retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2179\u20132188 (2019)","DOI":"10.1109\/CVPR.2019.00228"},{"issue":"10","key":"1_CR8","doi-asserted-by":"publisher","first-page":"2684","DOI":"10.1007\/s11263-020-01350-x","volume":"128","author":"A Dutta","year":"2020","unstructured":"Dutta, A., Akata, Z.: Semantically tied paired cycle consistency for any-shot sketch-based image retrieval. Int. J. Comput. Vision 128(10), 2684\u20132703 (2020)","journal-title":"Int. J. Comput. Vision"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Eitz, M., Hays, J., Alexa, M.: How do humans sketch objects? ACM Trans. Graph. (Proc. SIGGRAPH) 31(4), 44:1\u201344:10 (2012)","DOI":"10.1145\/2185520.2335395"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Federico, G., Amato, G., Carrara, F., Gennaro, C., Di Benedetto, M.: Visketch-gpt: Collaborative multi-scale feature extraction for sketch recognition and generation (2025). https:\/\/arxiv.org\/abs\/2503.22374","DOI":"10.2139\/ssrn.5192605"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Gatti, P., Parikh, K., Paul, D.P., Gupta, M., Mishra, A.: Composite sketch+ text queries for retrieving objects with elusive names and complex interactions. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 1869\u20131877 (2024)","DOI":"10.1609\/aaai.v38i3.27956"},{"key":"1_CR12","unstructured":"Ha, D., Eck, D.: A neural representation of sketch drawings. In: International Conference on Learning Representations (2018). https:\/\/openreview.net\/forum?id=Hy6GHpkCW"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"1_CR14","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"1_CR15","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"1_CR16","doi-asserted-by":"crossref","unstructured":"Jiang, J., Tang, H., Jiang, Z., Yu, W., Wu, D.: Arnet: self-supervised fg-sbir with unified sample feature alignment and multi-scale token recycling. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a039, pp. 3985\u20133993 (2025)","DOI":"10.1609\/aaai.v39i4.32417"},{"key":"1_CR17","unstructured":"Jongejan, J., Rowley, H., Kawashima, T., Kim, J., Fox-Gieg, N.: The Quick, Draw! - A.I. Experiment (2016). https:\/\/quickdraw.withgoogle.com\/, Accessed 8 May 2025"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Lin, F., Li, M., Li, D., Hospedales, T., Song, Y.Z., Qi, Y.: Zero-shot everything sketch-based image retrieval, and in explainable style. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and pattern recognition, pp. 23349\u201323358 (2023)","DOI":"10.1109\/CVPR52729.2023.02236"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Lin, H., Fu, Y., Lu, P., Gong, S., Xue, X., Jiang, Y.G.: Tc-net for isbir: triplet classification network for instance-level sketch based image retrieval. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 1676\u20131684 (2019)","DOI":"10.1145\/3343031.3350900"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Lin, H., Fu, Y., Xue, X., Jiang, Y.G.: Sketch-bert: learning sketch bidirectional encoder representation from transformers by self-supervised learning of sketch gestalt. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (June 2020)","DOI":"10.1109\/CVPR42600.2020.00679"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"Lyou, E., Lee, D., Kim, J., Lee, J.: Modality-aware representation learning for zero-shot sketch-based image retrieval. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5646\u20135655 (2024)","DOI":"10.1109\/WACV57701.2024.00555"},{"key":"1_CR22","doi-asserted-by":"crossref","unstructured":"Song, J., Yu, Q., Song, Y.Z., Xiang, T., Hospedales, T.M.: Deep spatial-semantic attention for fine-grained sketch-based image retrieval. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5551\u20135560 (2017)","DOI":"10.1109\/ICCV.2017.592"},{"key":"1_CR23","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inform. Process. Syst. 30 (2017)"},{"key":"1_CR24","doi-asserted-by":"crossref","unstructured":"Yu, Q., Yang, Y., Song, Y.Z., Xiang, T., Hospedales, T.M.: Sketch-a-net that beats humans. In: British Machine Vision Conference (2015). https:\/\/api.semanticscholar.org\/CorpusID:15004083","DOI":"10.5244\/C.29.7"}],"container-title":["Lecture Notes in Computer Science","Similarity Search and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06069-3_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T15:53:48Z","timestamp":1759852428000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06069-3_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,8]]},"ISBN":["9783032060686","9783032060693"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06069-3_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,8]]},"assertion":[{"value":"8 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that\u00a0are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"SISAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Similarity Search and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Reykjavik","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Iceland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"sisap2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.sisap.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}