{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T05:51:58Z","timestamp":1763704318046,"version":"3.45.0"},"reference-count":67,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T00:00:00Z","timestamp":1761436800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T00:00:00Z","timestamp":1761436800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,26]]},"DOI":"10.1109\/iccad66269.2025.11240733","type":"proceedings-article","created":{"date-parts":[[2025,11,20]],"date-time":"2025-11-20T18:39:34Z","timestamp":1763663974000},"page":"1-10","source":"Crossref","is-referenced-by-count":0,"title":["Network and Compiler Optimizations for Efficient Linear Algebra Kernels in Private Transformer Inference (Invited Paper)"],"prefix":"10.1109","author":[{"given":"Karthik","family":"Garimella","sequence":"first","affiliation":[{"name":"New York University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Negar","family":"Neda","sequence":"additional","affiliation":[{"name":"New York University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Austin","family":"Ebel","sequence":"additional","affiliation":[{"name":"New York University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nandan Kumar","family":"Jha","sequence":"additional","affiliation":[{"name":"New York University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brandon","family":"Reagen","sequence":"additional","affiliation":[{"name":"New York University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"Building Meta\u2019s GenAI infrastructure","volume-title":"Engineering at Meta","author":"Lee","key":"ref1"},{"volume-title":"Advancing medical AI with Med-Gemini","author":"Corrado","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/1536414.1536440"},{"key":"ref4","article-title":"Homomorphic encryption for arithmetic of approximate numbers","volume-title":"Cryptology ePrint Archive","author":"Cheon","year":"2016"},{"article-title":"A full RNS variant of approximate homomorphic encryption","year":"2018","author":"Cheon","key":"ref5"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.1109\/ISPASS57527.2023.00034","article-title":"Rpu: The ring processing unit","author":"Soni","year":"2023"},{"article-title":"Cheetah: Optimizing and accelerating homomorphic encryption for private inference","year":"2020","author":"Reagen","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527393"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527415"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/micro56248.2022.00086"},{"article-title":"Osiris: A systolic approach to accelerating fully homomorphic encryption","year":"2024","author":"Ebel","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589053"},{"key":"ref13","first-page":"142","volume-title":"Chet: an optimizing compiler for fully-homomorphic neural-network inferencing","author":"Dathathri","year":"2019"},{"key":"ref14","first-page":"375","volume-title":"Porcupine: a synthesizing compiler for vectorized homomorphic encryption","author":"Cowan","year":"2021"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3656382"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3676641.3716008"},{"article-title":"Attention is all you need","year":"2023","author":"Vaswani","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"article-title":"Does fully homomorphic encryption need compute acceleration?","year":"2021","author":"de Castro","key":"ref19"},{"key":"ref20","doi-asserted-by":"crossref","DOI":"10.46586\/tches.v2021.i4.114-148","article-title":"Over 100x faster bootstrapping in fully homomorphic encryption through memory-centric optimization with GPUs","volume-title":"Cryptology ePrint Archive","author":"Jung","year":"2021"},{"key":"ref21","article-title":"Efficient bootstrapping for approximate homomorphic encryption with non-sparse keys","volume-title":"Cryptology ePrint Archive","author":"Bossuat","year":"2020"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.59350\/1v2rj-gta12","article-title":"Encoding schemes in fhe","volume-title":"Math AND Programming","author":"Kun","year":"2023"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1109\/ISPASS61541.2024.00016","article-title":"Ciflow: Dataflow analysis and optimization of key switching for homomorphic encryption","author":"Neda","year":"2024"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1457838.1457864"},{"article-title":"Language models are unsupervised multitask learners","year":"2019","author":"Radford","key":"ref25"},{"article-title":"Roformer: Enhanced transformer with rotary position embedding","year":"2023","author":"Su","key":"ref26"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2023.emnlp-main.298","article-title":"Gqa: Training generalized multi-query transformer models from multi-head checkpoints","author":"Ainslie","year":"2023"},{"article-title":"Deepseek-v2: A strong, economical, and efficient mixture-of-experts language model","year":"2024","author":"Liu","key":"ref28"},{"article-title":"Glu variants improve transformer","year":"2020","author":"Shazeer","key":"ref29"},{"article-title":"Encryption-friendly LLM architecture","volume-title":"The Thirteenth International Conference on Learning Representations (ICLR)","author":"Rho","key":"ref30"},{"article-title":"EncryptedLLM: Privacy-preserving large language model inference via GPU-accelerated fully homomorphic encryption","volume-title":"Forty-second International Conference on Machine Learning (ICML)","author":"de Castro","key":"ref31"},{"year":"2022","key":"ref32","article-title":"Tiktoken"},{"key":"ref33","article-title":"Secure transformer inference made non-interactive","volume-title":"Cryptology ePrint Archive, Paper 2024\/136","author":"Zhang","year":"2024"},{"article-title":"CipherGPT: Secure two-party GPT inference","year":"2023","author":"Hou","key":"ref34"},{"article-title":"He-lrm: Encrypted deep learning recommendation models using fully homomorphic encryption","year":"2025","author":"Garimella","key":"ref35"},{"key":"ref36","first-page":"1651","article-title":"GAZELLE: A low latency framework for secure neural network inference","volume-title":"27th USENIX Security Symposium (USENIX Security 18)","author":"Juvekar"},{"key":"ref37","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-662-44371-2_31","article-title":"Algorithms in HElib","volume-title":"Cryptology ePrint Archive, Paper 2014\/106","author":"Halevi","year":"2014"},{"key":"ref38","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-96884-1_4","article-title":"Faster homomorphic linear transformations in HElib","volume-title":"Cryptology ePrint Archive, Paper 2018\/244","author":"Halevi","year":"2018"},{"article-title":"Phi-3 technical report: A highly capable language model locally on your phone","year":"2024","author":"Abdin","key":"ref39"},{"year":"2025","key":"ref40","article-title":"Llama models: Utilities intended for use with llama models"},{"article-title":"AERO: Softmax-only llms for efficient private inference","year":"2024","author":"Jha","key":"ref41"},{"author":"Face","key":"ref42","article-title":"Codeparrot"},{"article-title":"MPC-FORMER: FAST, PERFORMANT AND PRIVATE TRANSFORMER INFERENCE WITH MPC","volume-title":"The Eleventh International Conference on Learning Representations (ICLR)","author":"Li","key":"ref43"},{"article-title":"Entropy-guided attention for private llms","year":"2025","author":"Jha","key":"ref44"},{"article-title":"Bilinear MLPs enable weight-based mechanistic interpretability","volume-title":"The Thirteenth International Conference on Learning Representations (ICLR)","author":"Pearce","key":"ref45"},{"key":"ref46","doi-asserted-by":"crossref","DOI":"10.1145\/3243734.3243837","article-title":"Secure outsourced matrix computation and application to neural networks","volume-title":"Cryptology ePrint Archive, Paper 2018\/1041","author":"Jiang","year":"2018"},{"key":"ref47","article-title":"Secure and efficient outsourced matrix multiplication with homomorphic encryption","volume-title":"Cryptology ePrint Archive, Paper 2024\/1730","author":"Aikata","year":"2024"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/1064978.1065034"},{"key":"ref49","doi-asserted-by":"crossref","DOI":"10.1145\/3560827.3563379","article-title":"OpenFHE: Open-source fully homomorphic encryption library","volume-title":"Cryptology ePrint Archive, Paper 2022\/915","author":"Badawi","year":"2022"},{"year":"2024","key":"ref50","article-title":"Lattigo v6"},{"volume-title":"Microsoft SEAL (release 4.1)","year":"2023","key":"ref51"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3658644.3690375"},{"article-title":"Fast homomorphic linear algebra with blas","year":"2025","author":"Bae","key":"ref53"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/3385412.3386023"},{"article-title":"Tenseal: A library for encrypted tensor operations using homomorphic encryption","year":"2021","author":"Benaissa","key":"ref55"},{"key":"ref56","doi-asserted-by":"crossref","DOI":"10.1145\/3310273.3323047","article-title":"ngraph-he: A graph compiler for deep learning on homomorphically encrypted data","author":"Boemer","year":"2019"},{"key":"ref57","first-page":"6993","article-title":"DaCapo: Automatic bootstrapping management for efficient fully homomorphic encryption","volume-title":"33rd USENIX Security Symposium (USENIX Security 24)","author":"Cheon"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.56553\/popets-2023-0020"},{"article-title":"Deep residual learning for image recognition","year":"2015","author":"He","key":"ref59"},{"key":"ref60","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2016.91","article-title":"You only look once: Unified, real-time object detection","author":"Redmon","year":"2016"},{"key":"ref61","first-page":"3","volume-title":"Transformer-based language models and homomorphic encryption: An intersection with bert-tiny","author":"Rovida","year":"2024"},{"article-title":"Power-softmax: Towards secure llm inference over encrypted data","year":"2024","author":"Zimerman","key":"ref62"},{"key":"ref63","article-title":"THOR: Secure transformer inference with homomorphic encryption","volume-title":"Cryptology ePrint Archive, Paper 2024\/1881","author":"Moon","year":"2024"},{"key":"ref64","article-title":"Powerformer: Efficient and high-accuracy privacy-preserving language model with homomorphic encryption","volume-title":"Cryptology ePrint Archive, Paper 2024\/1429","author":"Park","year":"2024"},{"article-title":"Encryption-friendly llm architecture","year":"2025","author":"Rho","key":"ref65"},{"article-title":"Lora: Low-rank adaptation of large language models","year":"2021","author":"Hu","key":"ref66"},{"key":"ref67","article-title":"Tricycle: Private transformer inference with tricyclic encodings","volume-title":"Cryptology ePrint Archive, Paper 2025\/1200","author":"Lim","year":"2025"}],"event":{"name":"2025 IEEE\/ACM International Conference On Computer Aided Design (ICCAD)","start":{"date-parts":[[2025,10,26]]},"location":"Munich, Germany","end":{"date-parts":[[2025,10,30]]}},"container-title":["2025 IEEE\/ACM International Conference On Computer Aided Design (ICCAD)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11240608\/11240621\/11240733.pdf?arnumber=11240733","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T05:43:43Z","timestamp":1763703823000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11240733\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,26]]},"references-count":67,"URL":"https:\/\/doi.org\/10.1109\/iccad66269.2025.11240733","relation":{},"subject":[],"published":{"date-parts":[[2025,10,26]]}}}