{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:16:31Z","timestamp":1750220191852,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T00:00:00Z","timestamp":1663891200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,9,23]]},"DOI":"10.1145\/3573942.3574088","type":"proceedings-article","created":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T23:45:42Z","timestamp":1684280742000},"page":"734-739","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["First Describe, Then Depict: Generating Covers for Music and Books via Extracting Keywords"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5309-2207","authenticated-orcid":false,"given":"Valeria","family":"Efimova","sequence":"first","affiliation":[{"name":"ML Lab, ITMO University, Russia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5647-6521","authenticated-orcid":false,"given":"Viacheslav","family":"Shalamov","sequence":"additional","affiliation":[{"name":"ML Lab, ITMO University, Russia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1133-8432","authenticated-orcid":false,"given":"Andrey","family":"Filchenkov","sequence":"additional","affiliation":[{"name":"ML Lab, ITMO University, Russia"}]}],"member":"320","published-online":{"date-parts":[[2023,5,16]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"https:\/\/www.artbreeder.com\/compose\/albums","author":"Artbreeder","year":"2020","unstructured":"Artbreeder. 2021. https:\/\/www.artbreeder.com\/compose\/albums, 2020. Retrieved 2021-09-15."},{"key":"e_1_3_2_1_2_1","volume-title":"https:\/\/automated-art.co.uk\/","author":"Automated","year":"2021","unstructured":"Automated art. 2021. https:\/\/automated-art.co.uk\/, 2021. Retrieved 2021-09-15."},{"key":"e_1_3_2_1_3_1","volume-title":"A., and Goyal, N., Zettle-moyer, L.","author":"Aghajanyan A.","year":"2020","unstructured":"Aghajanyan, A., and Shrivastava, A., and Gupta, A., and Goyal, N., Zettle-moyer, L., and Gupta, S. 2020. Better fine-tuning by reducing representational collapse. arXiv preprint arXiv:2008.03156 (2020)."},{"key":"e_1_3_2_1_4_1","volume-title":"S.","author":"Beliga S.","year":"2015","unstructured":"Beliga, S., and Mestrovic, A., and MartincicIpsic, S. 2015. An overview of graph-based keyword extraction methods and approaches. Journal of information and organizational sciences 39, 1 (2015), 1\u201320."},{"key":"e_1_3_2_1_5_1","volume-title":"A., and Baeriswyl, M.","author":"Bennani-Smires K.","year":"2018","unstructured":"Bennani-Smires, K., and Musat, C., and Hossmann, A., and Baeriswyl, M., and Jaggi, M. 2018. Simple unsupervised keyphrase extraction using sentence embeddings. arXiv preprint arXiv:1801.04470 (2018)."},{"key":"e_1_3_2_1_6_1","volume-title":"L., and Liu, L., and Ling, Z., and Li, W.","author":"Cong W.","year":"2019","unstructured":"Cong, W., and Zhang, J., and Niu, L., and Liu, L., and Ling, Z., and Li, W., and Zhang, L. 2019. Image harmonization dataset iharmony4: Hcoco, hadobe5k, hflickr, and hday2night. arXiv preprint arXiv:1908.10526 (2019)"},{"key":"e_1_3_2_1_7_1","first-page":"8403","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Cong W.","year":"2020","unstructured":"Cong, W., and Zhang, J., and Niu, L., and Liu, L., and Ling, Z., and Li, W., and Zhang, L. 2020. Dovenet: Deep image harmonization via domain verification. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2020), pp. 8394\u20138403."},{"key":"e_1_3_2_1_8_1","volume-title":"F., and Royo-Letelier, J.","author":"Delbouys R.","year":"2018","unstructured":"Delbouys, R., and Hennequin, R., and Piccoli, F., and Royo-Letelier, J., and Moussallam, M. 2018. Music mood detection based on audio and lyrics with deep neural net. arXiv preprint arXiv:1809.07276 (2018)."},{"key":"e_1_3_2_1_9_1","volume-title":"K.","author":"Devlin J.","year":"2018","unstructured":"Devlin, J., and Chang, M.-W., and Lee, K., and Toutanova, K. 2018. Bert:Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_10_1","unstructured":"Forte M. and Pitie F. 2020. f b alpha matting. arXiv preprint arXiv:2003.07711 (2020)"},{"key":"e_1_3_2_1_11_1","volume-title":"F., and Hees, J.","author":"Frolov S.","year":"2021","unstructured":"Frolov, S., and Hinz, T., and Raue, F., and Hees, J., and Dengel, A. 2021. Adversarial text-to-image synthesis: A review. arXiv preprint arXiv:2101.09983 (2021)."},{"key":"e_1_3_2_1_12_1","volume-title":"E., and Shen, X., and Gambaretto, E., and Gagne, C., and Lalonde, J.-F.","author":"Gardner M.-A.","year":"2017","unstructured":"Gardner, M.-A., and Sunkavalli, K., and Yumer, E., and Shen, X., and Gambaretto, E., and Gagne, C., and Lalonde, J.-F. 2017. Learning to predict indoor illumination from a single image. arXiv preprint arXiv:1704.00090 (2017)."},{"key":"e_1_3_2_1_13_1","volume-title":"M.","author":"Gatys L. A.","year":"2015","unstructured":"Gatys, L. A., and Ecker, A. S., and Bethge, M. 2015. A neural algorithm of artistic style. arXiv preprint arXiv:1508.06576 (2015)."},{"key":"e_1_3_2_1_14_1","volume-title":"Rocklou album cover generator. https:\/\/www.rocklou.com\/albumcovergenerator","author":"Gavelin D.","year":"2019","unstructured":"Gavelin, D. 2019. Rocklou album cover generator. https:\/\/www.rocklou.com\/albumcovergenerator, 2019. Retrieved 2021-09-15."},{"key":"e_1_3_2_1_15_1","first-page":"1014","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"Gupta K.","year":"2021","unstructured":"Gupta, K., and Lazarow, J., and Achille, A., and Davis, L. S., and Mahadevan, V., and Shrivastava, A. 2021. Layouttransformer: Layout generation and completion with self-attention. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (2021), pp. 1004\u20131014."},{"key":"e_1_3_2_1_16_1","first-page":"1034","volume-title":"Proceedings of the IEEE international conference on computer vision","author":"He K.","year":"2015","unstructured":"He, K., and Zhang, X., and Ren, S., and Sun, J. 2015. Delving deep into rectifiers: Surpassing human-level performance on imagenet classification. In Proceedings of the IEEE international conference on computer vision (2015), pp. 1026\u20131034."},{"volume-title":"10th International Workshop on Machine Learning and Music (2017)","author":"Hepburn A.","key":"e_1_3_2_1_17_1","unstructured":"Hepburn, A., and McConville, R., and Santos-Rodr\u0131guez, R. 2017. Album cover generation from genre tags. In 10th International Workshop on Machine Learning and Music (2017)"},{"key":"e_1_3_2_1_18_1","first-page":"47","article-title":"and Chan, W., and Fleet, D. J., and Norouzi, M., and Salimans, T. 2022. Cascaded diffusion models for high fidelity image generation","volume":"23","author":"Ho J.","year":"2022","unstructured":"Ho, J., and Saharia, C., and Chan, W., and Fleet, D. J., and Norouzi, M., and Salimans, T. 2022. Cascaded diffusion models for high fidelity image generation. J. Mach. Learn. Res. 23 (2022), 47\u20131.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_19_1","first-page":"7321","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Hold-Geoffroy Y.","year":"2017","unstructured":"Hold-Geoffroy, Y., and Sunkavalli, K., and Hadap, S., and Gambaretto, E., and Lalonde, J.-F. 2017. Deep outdoor illumination estimation. In Proceedings of the IEEE conference on computer vision and pattern recognition (2017), pp. 7312\u20137321."},{"key":"e_1_3_2_1_20_1","volume-title":"K., and Yan, Q.","author":"Ke Z.","year":"2020","unstructured":"Ke, Z., and Sun, J., and Li, K., and Yan, Q., and Lau, R. W. 2020. Modnet: real-time trimap-free portrait matting via objective decomposition. arXiv e-prints (2020)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2007.4409107"},{"key":"e_1_3_2_1_22_1","first-page":"3","article-title":"and Siddiqui, S. A., and Rizvi, S. T. R., and Iwana, B. K., and Uchida, S., and Dengel, A., and Ahmed, S. 2020. Benchmarking deep learning models for classification of book covers","volume":"1","author":"Lucieri A.","year":"2020","unstructured":"Lucieri, A., and Sabir, H., and Siddiqui, S. A., and Rizvi, S. T. R., and Iwana, B. K., and Uchida, S., and Dengel, A., and Ahmed, S. 2020. Benchmarking deep learning models for classification of book covers. SN Computer Science 1, 3 (2020), 1\u201316.","journal-title":"SN Computer Science"},{"key":"e_1_3_2_1_23_1","first-page":"24","volume-title":"D., and Ellis, D., and Mcvicar, M., and Battenberg, E.","author":"McFee B.","year":"2020","unstructured":"McFee, B., and Raffel, C., and Liang, D., and Ellis, D., and Mcvicar, M., and Battenberg, E., and Nieto, O. 2020. librosa: Audio and music signal analysis in python. pp. 18\u201324."},{"key":"e_1_3_2_1_24_1","unstructured":"Mirza M. and Osindero S. 2014. Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784 (2014)."},{"key":"e_1_3_2_1_25_1","first-page":"8763","volume-title":"C., and Ramesh, A., and Goh, G., and Agarwal, S., and Sastry, G., and Askell, A., and Mishkin, P.","author":"Radford A.","year":"2021","unstructured":"Radford, A., and Kim, J. W., and Hallacy, C., and Ramesh, A., and Goh, G., and Agarwal, S., and Sastry, G., and Askell, A., and Mishkin, P., and Clark, J., 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning (2021), PMLR, pp. 8748\u20138763."},{"key":"e_1_3_2_1_26_1","volume-title":"A., and Chu, C.","author":"Ramesh A.","year":"2022","unstructured":"Ramesh, A., and Dhariwal, P., and Nichol, A., and Chu, C., and Chen, M. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_1_27_1","first-page":"8831","volume-title":"International Conference on Machine Learning","author":"Ramesh A.","year":"2021","unstructured":"Ramesh, A., and Pavlov, M., and Goh, G., and Gray, S.,and Voss, C., and Radford, A., and Chen, M., and Sutskever, I. 2021. Zero-shot text-to-image generation. In International Conference on Machine Learning (2021), PMLR, pp. 8821\u20138831."},{"key":"e_1_3_2_1_28_1","volume-title":"B.","author":"Reinhard E.","year":"2001","unstructured":"Reinhard, E., and Adhikhmin, M., and Gooch, B., and Shirley, P. 2001. Color transfer between images. IEEE Computer graphics and applications 21, 5 (2001), 34\u201341."},{"key":"e_1_3_2_1_29_1","volume-title":"Gan album art. https:\/\/ganalbum.art\/","author":"Seyp V.","year":"2019","unstructured":"Seyp, V. 2021. Gan album art. https:\/\/ganalbum.art\/, 2019. Accessed: 2021-09-15."},{"key":"e_1_3_2_1_30_1","first-page":"3797","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"Tsai Y.-H.","year":"2017","unstructured":"Tsai, Y.-H., and Shen, X., and Lin, Z., and Sunkavalli, K., and Lu, X., and Yang, M.-H. 2017. Deep image harmonization. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017), pp. 3789\u20133797."},{"key":"e_1_3_2_1_31_1","first-page":"6932","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Ulyanov D.","year":"2017","unstructured":"Ulyanov, D., and Vedaldi, A., and Lempitsky, V. 2017. Improved texture networks: Maximizing quality and diversity in feed-forward stylization and texture synthesis. In Proceedings of the IEEE conference on computer vision and pattern recognition (2017), pp. 6924\u20136932."},{"key":"e_1_3_2_1_32_1","volume-title":"N., and Uszkoreit, J., and Jones, L., and Gomez, A. N., and Kaiser, L.","author":"Vaswani A.","year":"2017","unstructured":"Vaswani, A., and Shazeer, N., and Parmar, N., and Uszkoreit, J., and Jones, L., and Gomez, A. N., and Kaiser, L., and Polosukhin, I. 2017. Attention is all you need. arXiv preprint arXiv:1706.03762 (2017). Xu, T., Zhang, P., Huang, Q., Zhang, H., Gan, Z., Huang, X., and"},{"key":"e_1_3_2_1_33_1","first-page":"1324","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"He X.","year":"2018","unstructured":"He, X. 2018. Attngan: Fine-grained text to image generation with attentional generative adversarial networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (2018), pp. 1316\u20131324."},{"key":"e_1_3_2_1_34_1","first-page":"11339","volume-title":"International Conference on Machine Learning","author":"Zhang J.","year":"2020","unstructured":"Zhang, J., and Zhao, Y., and Saleh, M., and Liu, P. 2020. Pegasus: Pre-training with extracted gap-sentences for abstractive summarization. In International Conference on Machine Learning (2020), PMLR, pp. 11328\u201311339."}],"event":{"name":"AIPR 2022: 2022 5th International Conference on Artificial Intelligence and Pattern Recognition","acronym":"AIPR 2022","location":"Xiamen China"},"container-title":["Proceedings of the 2022 5th International Conference on Artificial Intelligence and Pattern Recognition"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3574088","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3573942.3574088","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:32Z","timestamp":1750186952000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3574088"}},"subtitle":["This paper presents two methods to generate high resolution uncopyrighted book covers or music album covers."],"short-title":[],"issued":{"date-parts":[[2022,9,23]]},"references-count":34,"alternative-id":["10.1145\/3573942.3574088","10.1145\/3573942"],"URL":"https:\/\/doi.org\/10.1145\/3573942.3574088","relation":{},"subject":[],"published":{"date-parts":[[2022,9,23]]},"assertion":[{"value":"2023-05-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}