{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T05:41:13Z","timestamp":1759902073660,"version":"build-2065373602"},"reference-count":77,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T00:00:00Z","timestamp":1756771200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T00:00:00Z","timestamp":1756771200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,2]]},"DOI":"10.1109\/cluster59342.2025.11186493","type":"proceedings-article","created":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T17:35:09Z","timestamp":1759858509000},"page":"1-14","source":"Crossref","is-referenced-by-count":0,"title":["Towards High-Performance and Portable Molecular Docking on CPUs Through Vectorization"],"prefix":"10.1109","author":[{"given":"Gianmarco","family":"Accordi","sequence":"first","affiliation":[{"name":"Politecnico di Milano,Dipartimento di Elettronica, Informazione e Bioingegneria,Milan,Italy"}]},{"given":"Jens","family":"Domke","sequence":"additional","affiliation":[{"name":"RIKEN Center for Computational Science,Kobe,Japan"}]},{"given":"Theresa","family":"Pollinger","sequence":"additional","affiliation":[{"name":"RIKEN Center for Computational Science,Kobe,Japan"}]},{"given":"Davide","family":"Gadioli","sequence":"additional","affiliation":[{"name":"Politecnico di Milano,Dipartimento di Elettronica, Informazione e Bioingegneria,Milan,Italy"}]},{"given":"Gianluca","family":"Palermo","sequence":"additional","affiliation":[{"name":"Politecnico di Milano,Dipartimento di Elettronica, Informazione e Bioingegneria,Milan,Italy"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/tetc.2022.3187134"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.0c01010"},{"journal-title":"TOP500 Project","article-title":"TOP500: The List","year":"2025","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.23919\/VLSICircuits52068.2021.9492415"},{"journal-title":"LUMI Supercomputer","article-title":"Lumi hardware overview","year":"2023","key":"ref5"},{"journal-title":"European Centre for Medium-Range Weather Forecasts","article-title":"Supercomputer facility","year":"2022","key":"ref6"},{"volume-title":"General-purpose multicore architectures","year":"2025","author":"Ghose","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ipdps.2019.00019"},{"key":"ref9","first-page":"19","volume-title":"The Drug Repurposing Strategy in the Exscalate4CoV Project: Raloxifene Clinical Trials","author":"Beccari","year":"2023"},{"issue":"14","key":"ref10","doi-asserted-by":"crossref","first-page":"1639","DOI":"10.1002\/(SICI)1096-987X(19981115)19:14<1639::AID-JCC10>3.0.CO;2-B","article-title":"Automated docking using a lamarckian genetic algorithm and an empirical binding free energy function","volume":"19","author":"Morris","year":"1998","journal-title":"Journal of Computational Chemistry"},{"journal-title":"Google","article-title":"Google highway","year":"2025","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.20291"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1090\/noti1095"},{"journal-title":"mudock","year":"2025","author":"Polimi","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.healthpol.2010.12.002"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1111\/j.1476-5381.2010.01127.x"},{"key":"ref17","doi-asserted-by":"crossref","DOI":"10.1101\/2024.07.08.602536","volume-title":"The impact of library size and scale of testing on virtual screening","author":"Liu","year":"2024"},{"key":"ref18","first-page":"255","volume-title":"Docking and Virtual Screening in Drug Discovery","author":"Kontoyianni","year":"2017"},{"issue":"7873","key":"ref19","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","article-title":"Highly accurate protein structure prediction with alphafold","volume":"596","author":"Jumper","year":"2021","journal-title":"Nature"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.2174\/157340911795677602"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/p3hpc51967.2020.00009"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.21256"},{"issue":"9","key":"ref23","doi-asserted-by":"crossref","first-page":"803","DOI":"10.1002\/cpe.728","article-title":"The linpack benchmark: past, present and future","volume":"15","author":"Dongarra","year":"2003","journal-title":"Concurrency and Computation: Practice and Experience"},{"journal-title":"Phoronix Media","article-title":"Openbenchmarking.org: Cross-platform, open-source automated benchmarking platform","year":"2025","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624135"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3185768.3185771"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"313","DOI":"10.1016\/j.future.2024.03.050","article-title":"Genarchbench: A genomics benchmark suite for arm hpc processors","volume":"157","author":"L\u00f3pez-Villellas","year":"2024","journal-title":"Future Generation Computer Systems"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3695794.3695813"},{"volume-title":"Microarchitectural comparison and in-core modeling of state-of-the-art cpus: Grace, sapphire rapids, and genoa","year":"2024","author":"Laukemann","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3636480.3637097"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3581576.3581621"},{"key":"ref32","doi-asserted-by":"crossref","DOI":"10.1007\/s42514-024-00212-z","article-title":"An empirical performance evaluation of sycl on arm multi-core processors","author":"Liang","year":"2025","journal-title":"CCF Transactions on High Performance Computing"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-49943-3_7"},{"key":"ref34","article-title":"An evaluation of the fujitsu a64fx for hpc applications","volume-title":"Cray User Group 2021, May 2021, cray User Group 2021","author":"Poenaru","year":"2021"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3636480.3637095"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.cpc.2018.10.028","article-title":"Simd vectorization for the lennard-jones potential with avx2 and avx-512 instructions","volume":"237","author":"Watanabe","year":"2019","journal-title":"Computer Physics Communications"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-78713-4_18"},{"key":"ref38","first-page":"1","article-title":"Introduction to intel advanced vector extensions","volume":"23","author":"Lomont","year":"2011","journal-title":"Intel white paper"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/mm.2017.35"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/cluster48925.2021.00106"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1155\/2021\/3264624"},{"article-title":"Working draft, standard for programming language c++","volume-title":"I. JTC1\/SC22\/WG21","year":"2019","key":"ref42"},{"journal-title":"xtensor-stack developers","article-title":"xsimd: C++ wrappers for simd intrinsics","year":"2016","key":"ref43"},{"key":"ref44","doi-asserted-by":"crossref","first-page":"947","DOI":"10.1016\/j.future.2017.08.007","article-title":"Implications of a metric for performance portability","volume":"92","author":"Pennycook","year":"2019","journal-title":"Future Generation Computer Systems"},{"article-title":"O. A. R. Board","volume-title":"Open MP Application Programming Interface Specification 5.2","year":"2021","key":"ref45"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.21334"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jctc.0c01006"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.2c01504"},{"key":"ref49","first-page":"1","article-title":"A case study in using opencl on fpgas: Creating an open-source accelerator of the autodock molecular docking software","volume-title":"in FSP Workshop 2018; Fifth International Workshop on FPGAs for Software Programmers","author":"Solis-Vasquez","year":"2018"},{"key":"ref50","doi-asserted-by":"crossref","first-page":"107863","DOI":"10.1016\/j.future.2025.107863","article-title":"Harnessing quality-throughput trade-off in scoring functions for extreme-scale virtual screening campaigns","volume":"172","author":"Zhang","year":"2025","journal-title":"Future Generation Computer Systems"},{"issue":"6+","key":"ref51","first-page":"1325","article-title":"Fastgrid - the accelerated autogrid potential maps generation for molecular docking","volume":"29","author":"Ol\u0161\u00e1k","year":"2012","journal-title":"Computing and Informatics"},{"issue":"7","key":"ref52","doi-asserted-by":"crossref","first-page":"1384","DOI":"10.1016\/j.ces.2008.12.006","article-title":"Efficient implementation of detailed surface chemistry into reactor models using mapped rate data","volume":"64","author":"Votsmeier","year":"2009","journal-title":"Chemical Engineering Science"},{"key":"ref53","doi-asserted-by":"crossref","first-page":"108171","DOI":"10.1016\/j.cpc.2021.108171","article-title":"Lammps - a flexible simulation tool for particle-based materials modeling at the atomic, meso, and continuum scales","volume":"271","author":"Thompson","year":"2022","journal-title":"Computer Physics Communications"},{"article-title":"Weather research and forecasting (wrf) model","volume-title":"WRF Community","year":"2000","key":"ref54"},{"journal-title":"AWS","article-title":"Aws calculator","year":"2025","key":"ref55"},{"journal-title":"Intel Corporation","article-title":"Intel\u00ae Xeon \u00ae Scalable Processors (Formerly Sapphire Rapids)","year":"2025","key":"ref56"},{"journal-title":"Amd epyc genoa zen 4 cpu lineup specs and benchmarks leaked","year":"2022","author":"Mujtaba","key":"ref57"},{"journal-title":"Arm\u2019s neoverse v2, in aws\u2019s graviton 4","year":"2024","author":"Lam","key":"ref58"},{"journal-title":"Riken","article-title":"Fugaku usage fee","year":"2019","key":"ref59"},{"journal-title":"Fujitsu","article-title":"A64fx","year":"2025","key":"ref60"},{"journal-title":"CSCS","article-title":"Cscs nvidia grace usage cost","year":"2024","key":"ref61"},{"journal-title":"NVIDIA Grace Performance Tuning Guide","article-title":"NVIDIA Corporation","year":"2025","key":"ref62"},{"journal-title":"Chips and Cheese","article-title":"Golden cove\u2019s vector register file: Checking with official spr data","year":"2024","key":"ref63"},{"journal-title":"Popping the hood on golden cove","year":"2021","author":"Lam","key":"ref64"},{"journal-title":"Amd\u2019s zen 4 part 1: Frontend and execution engine","year":"2022","author":"Frontend","key":"ref65"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/icppw.2010.38"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-66146-4_17"},{"journal-title":"NVIDIA","article-title":"Nvidia grace power and thermals","year":"2023","key":"ref68"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1080\/17460441.2023.2221025"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1021\/jm048957q"},{"key":"ref71","article-title":"gitHub issue \\#102047","author":"Project","year":"2023","journal-title":"[avx512] prefering 512-bit vectors on recent intel cpus"},{"volume-title":"Performance of sse and avx instruction sets","year":"2012","author":"Jeong","key":"ref72"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/cluster49012.2020.00075"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/3636480.3637093"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1145\/3695794.3695813"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/p3hpc54578.2021.00004"},{"key":"ref77","first-page":"42","article-title":"A portable drug discovery platform for urgent computing","volume-title":"Procedia Computer Science","volume":"240","author":"Gadioli","year":"2024"}],"event":{"name":"2025 IEEE International Conference on Cluster Computing (CLUSTER)","start":{"date-parts":[[2025,9,2]]},"location":"United Kingdom","end":{"date-parts":[[2025,9,5]]}},"container-title":["2025 IEEE International Conference on Cluster Computing (CLUSTER)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11186399\/11186452\/11186493.pdf?arnumber=11186493","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T05:02:11Z","timestamp":1759899731000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11186493\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,2]]},"references-count":77,"URL":"https:\/\/doi.org\/10.1109\/cluster59342.2025.11186493","relation":{},"subject":[],"published":{"date-parts":[[2025,9,2]]}}}