{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T06:46:13Z","timestamp":1777013173341,"version":"3.51.4"},"reference-count":59,"publisher":"IOP Publishing","issue":"3","license":[{"start":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T00:00:00Z","timestamp":1726444800000},"content-version":"vor","delay-in-days":15,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"},{"start":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T00:00:00Z","timestamp":1726444800000},"content-version":"tdm","delay-in-days":15,"URL":"https:\/\/iopscience.iop.org\/info\/page\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100006208","name":"High Energy Physics","doi-asserted-by":"crossref","award":["DE-FOA-0002705"],"award-info":[{"award-number":["DE-FOA-0002705"]}],"id":[{"id":"10.13039\/100006208","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Willum Fonden","award":["0002544"],"award-info":[{"award-number":["0002544"]}]},{"DOI":"10.13039\/100011664","name":"SLAC National Accelerator Laboratory","doi-asserted-by":"crossref","award":["DE-AC02-76SF0051"],"award-info":[{"award-number":["DE-AC02-76SF0051"]}],"id":[{"id":"10.13039\/100011664","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["iopscience.iop.org"],"crossmark-restriction":false},"short-container-title":["Mach. Learn.: Sci. Technol."],"published-print":{"date-parts":[[2024,9,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>\n                    We pursue the use of deep learning methods to improve state-of-the-art computations in theoretical high-energy physics. Planar\n                    <jats:inline-formula>\n                      <jats:tex-math>\n                        \n                      <\/jats:tex-math>\n                      <mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" overflow=\"scroll\">\n                        <mml:mrow>\n                          <mml:mrow>\n                            <mml:mi class=\"MJX-tex-calligraphic\">N<\/mml:mi>\n                          <\/mml:mrow>\n                          <mml:mo>=<\/mml:mo>\n                          <mml:mn>4<\/mml:mn>\n                        <\/mml:mrow>\n                      <\/mml:math>\n                    <\/jats:inline-formula>\n                    Super Yang\u2013Mills theory is a close cousin to the theory that describes Higgs boson production at the Large Hadron Collider; its scattering amplitudes are large mathematical expressions containing integer coefficients. In this paper, we apply transformers to predict these coefficients. The problem can be formulated in a language-like representation amenable to standard cross-entropy training objectives. We design two related experiments and show that the model achieves high accuracy (\n                    <jats:inline-formula>\n                      <jats:tex-math>\n                        \n                      <\/jats:tex-math>\n                      <mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" overflow=\"scroll\">\n                        <mml:mrow>\n                          <mml:mrow>\n                            <mml:mo>&gt;<\/mml:mo>\n                          <\/mml:mrow>\n                          <mml:mrow>\n                            <mml:mn>98<\/mml:mn>\n                            <mml:mi mathvariant=\"normal\">%<\/mml:mi>\n                          <\/mml:mrow>\n                          <mml:mo stretchy=\"false\">)<\/mml:mo>\n                        <\/mml:mrow>\n                      <\/mml:math>\n                    <\/jats:inline-formula>\n                    on both tasks. Our work shows that transformers can be applied successfully to problems in theoretical physics that require exact solutions.\n                  <\/jats:p>","DOI":"10.1088\/2632-2153\/ad743e","type":"journal-article","created":{"date-parts":[[2024,8,27]],"date-time":"2024-08-27T19:00:46Z","timestamp":1724785246000},"page":"035073","update-policy":"https:\/\/doi.org\/10.1088\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["Transforming the bootstrap: using transformers to compute scattering amplitudes in planar\n                    <mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\">\n                      <mml:mrow>\n                        <mml:mrow>\n                          <mml:mi>N<\/mml:mi>\n                        <\/mml:mrow>\n                        <mml:mo>=<\/mml:mo>\n                        <mml:mn>4<\/mml:mn>\n                      <\/mml:mrow>\n                    <\/mml:math>\n                    super Yang\u2013Mills theory"],"prefix":"10.1088","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3235-9486","authenticated-orcid":true,"given":"Tianji","family":"Cai","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4737-3931","authenticated-orcid":true,"given":"Garrett W","family":"Merz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5912-3342","authenticated-orcid":false,"given":"Fran\u00e7ois","family":"Charton","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2536-4209","authenticated-orcid":false,"given":"Niklas","family":"Nolte","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0032-0181","authenticated-orcid":false,"given":"Matthias","family":"Wilhelm","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5769-7094","authenticated-orcid":true,"given":"Kyle","family":"Cranmer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4985-7518","authenticated-orcid":false,"given":"Lance J","family":"Dixon","sequence":"additional","affiliation":[]}],"member":"266","published-online":{"date-parts":[[2024,9,16]]},"reference":[{"key":"mlstad743ebib1","doi-asserted-by":"publisher","DOI":"10.1088\/1361-6471\/acbaec","article-title":"Les Houches 2021-physics at TeV colliders: report on the standard model precision wishlist","volume":"50","author":"Huss","year":"2023","journal-title":"J. Phys. G: Nucl. Part. Phys."},{"key":"mlstad743ebib2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.physrep.2021.03.006","article-title":"Collider physics at the precision frontier","volume":"922","author":"Heinrich","year":"2021","journal-title":"Phys. Rep."},{"key":"mlstad743ebib3","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.114.212001","article-title":"Higgs Boson Gluon-Fusion production in QCD at three loops","volume":"114","author":"Anastasiou","year":"2015","journal-title":"Phys. Rev. Lett."},{"key":"mlstad743ebib4","doi-asserted-by":"publisher","first-page":"JHEP05(2016)058","DOI":"10.1007\/JHEP05(2016)058","article-title":"High precision determination of the gluon fusion Higgs boson cross-section at the LHC","author":"Anastasiou","year":"2016","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib5","doi-asserted-by":"publisher","first-page":"JHEP05(2018)028","DOI":"10.1007\/JHEP05(2018)028","article-title":"Higgs boson production at hadron colliders at N3LO in QCD","author":"Mistlberger","year":"2018","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib6","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.125.051804","article-title":"Higgs Boson production in bottom-quark fusion to third order in the strong coupling","volume":"125","author":"Duhr","year":"2020","journal-title":"Phys. Rev. Lett."},{"key":"mlstad743ebib7","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.117.072001","article-title":"Vector-Boson fusion Higgs production at three loops in QCD","volume":"117","author":"Dreyer","year":"2016","journal-title":"Phys. Rev. Lett."},{"key":"mlstad743ebib8","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.125.172001","article-title":"Drell-Yan cross section to third order in the strong coupling constant","volume":"125","author":"Duhr","year":"2020","journal-title":"Phys. Rev. Lett."},{"key":"mlstad743ebib9","doi-asserted-by":"publisher","first-page":"JHEP11(2020)143","DOI":"10.1007\/JHEP11(2020)143","article-title":"Charged current Drell-Yan production at N3LO","author":"Duhr","year":"2020","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib10","doi-asserted-by":"publisher","first-page":"JHEP03(2022)116","DOI":"10.1007\/JHEP03(2022)116","article-title":"Lepton-pair production at hadron colliders at N3LO in QCD","author":"Duhr","year":"2022","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib11","doi-asserted-by":"publisher","first-page":"JHEP11(2011)023","DOI":"10.1007\/JHEP11(2011)023","article-title":"Bootstrapping the three-loop hexagon","author":"Dixon","year":"2011","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib12","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.117.241601","article-title":"Bootstrapping a five-loop amplitude using Steinmann relations","volume":"117","author":"Caron-Huot","year":"2016","journal-title":"Phys. Rev. Lett."},{"key":"mlstad743ebib13","doi-asserted-by":"publisher","first-page":"JHEP08(2019)016","DOI":"10.1007\/JHEP08(2019)016","article-title":"Six-Gluon amplitudes in planar N = 4 super-Yang-Mills theory at six and seven loops","author":"Caron-Huot","year":"2019","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib14","doi-asserted-by":"publisher","first-page":"JHEP07(2022)153","DOI":"10.1007\/JHEP07(2022)153","article-title":"Bootstrapping a stress-tensor form factor through eight loops","author":"Dixon","year":"2022","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib15","doi-asserted-by":"publisher","first-page":"JHEP02(2012)056","DOI":"10.1007\/JHEP02(2012)056","article-title":"Two-loop QCD corrections to the helicity amplitudes for H\u2192 3 partons","author":"Gehrmann","year":"2012","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib16","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.111.091602","article-title":"Spacetime and flux tube S-Matrices at finite coupling for N=4 supersymmetric Yang-Mills theory","volume":"111","author":"Basso","year":"2013","journal-title":"Phys. Rev. Lett."},{"key":"mlstad743ebib17","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.126.031602","article-title":"Operator product expansion for form factors","volume":"126","author":"Sever","year":"2021","journal-title":"Phys. Rev. Lett."},{"key":"mlstad743ebib18","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"mlstad743ebib19","article-title":"Neural machine translation by jointly learning to align and translate","author":"Bahdanau","year":"2015"},{"key":"mlstad743ebib20","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"mlstad743ebib21","article-title":"An image is worth 16 \u00d7 16 words: transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"key":"mlstad743ebib22","doi-asserted-by":"publisher","first-page":"468","DOI":"10.1038\/s41586-023-06924-6","article-title":"Mathematical discoveries from program search with large language models","volume":"625","author":"Romera-Paredes","year":"2024","journal-title":"Nature"},{"key":"mlstad743ebib23","article-title":"Linear algebra with transformers","volume":"2022","author":"Charton","year":"2022","journal-title":"Trans. Mach. Learn. Res."},{"key":"mlstad743ebib24","doi-asserted-by":"publisher","first-page":"003","DOI":"10.22323\/1.376.0003","article-title":"The Steinmann cluster bootstrap for N = 4 super Yang-Mills amplitudes","volume":"CORFU2019","author":"Caron-Huot","year":"2020","journal-title":"PoS"},{"key":"mlstad743ebib25","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/acb2b2","article-title":"SYMBA: symbolic computation of squared amplitudes in high energy physics with machine learning","volume":"4","author":"Alnuqaydan","year":"2023","journal-title":"Mach. Learn. Sci. Technol."},{"key":"mlstad743ebib26","doi-asserted-by":"publisher","first-page":"JHEP06(2019)003","DOI":"10.1007\/JHEP06(2019)003","article-title":"Branes with brains: exploring string vacua with deep reinforcement learning","author":"Halverson","year":"2019","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib27","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1142\/S2810939223500028","article-title":"Simplifying polylogarithms with machine learning","volume":"1","author":"Dersy","year":"2024","journal-title":"Int. J. Data Sci. Math. Sci."},{"key":"mlstad743ebib28","article-title":"Deep learning for symbolic mathematics","author":"Lample","year":"2020"},{"key":"mlstad743ebib29","article-title":"Deep symbolic regression for recurrent sequences","author":"d\u2019Ascoli","year":"2022"},{"key":"mlstad743ebib30","article-title":"Learning the greatest common divisor: explaining transformer predictions","author":"Charton","year":"2024"},{"key":"mlstad743ebib31","first-page":"1391","article-title":"The algebraic combinatorial approach for low-rank matrix completion","volume":"16","author":"Kir\u00e1ly","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"mlstad743ebib32","article-title":"Teaching arithmetic to small transformers","author":"Lee","year":"2024"},{"key":"mlstad743ebib33","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1016\/0550-3213(77)90328-5","article-title":"Supersymmetric Yang-Mills theories","volume":"121","author":"Brink","year":"1977","journal-title":"Nucl. Phys. B"},{"key":"mlstad743ebib34","doi-asserted-by":"publisher","first-page":"JHEP04(2021)147","DOI":"10.1007\/JHEP04(2021)147","article-title":"A three-point form factor through five loops","author":"Dixon","year":"2021","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib35","doi-asserted-by":"publisher","first-page":"JHEP05(2012)082","DOI":"10.1007\/JHEP05(2012)082","article-title":"Analytic two-loop form factors in N=4 SYM","author":"Brandhuber","year":"2012","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib36","doi-asserted-by":"publisher","first-page":"JHEP08(2012)043","DOI":"10.1007\/JHEP08(2012)043","article-title":"Hopf algebras, coproducts and symbols: an application to Higgs boson amplitudes","author":"Duhr","year":"2012","journal-title":"J. High Energy Phys."},{"key":"mlstad743ebib37","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.105.151605","article-title":"Classical polylogarithms for amplitudes and Wilson loops","volume":"105","author":"Goncharov","year":"2010","journal-title":"Phys. Rev. Lett."},{"key":"mlstad743ebib38","article-title":"xval: a continuous number encoding for large language models","author":"Golkar","year":"2023"},{"key":"mlstad743ebib39","article-title":"Investigating the limitations of the transformers with simple arithmetic tasks","author":"Nogueira","year":"2021"},{"key":"mlstad743ebib40","article-title":"Salsa: attacking lattice cryptography with transformers","author":"Wenger","year":"2022"},{"key":"mlstad743ebib41","article-title":"Language models are few-shot learners","author":"Brown","year":"2020"},{"key":"mlstad743ebib42","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"mlstad743ebib43","article-title":"Adam: a method for stochastic optimization","author":"Kingma","year":"2015"},{"key":"mlstad743ebib44","article-title":"Pytorch: an imperative style, high-performance deep learning library","author":"Paszke","year":"2019"},{"key":"mlstad743ebib45","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput."},{"key":"mlstad743ebib46","doi-asserted-by":"crossref","DOI":"10.3115\/v1\/D14-1179","article-title":"Learning phrase representations using rnn encoder-decoder for statistical machine translation","author":"Cho","year":"2014"},{"key":"mlstad743ebib47","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.11218272","article-title":"Symbol representation of the three gluon form factor in N=4 planar super Yang-Mills theory","author":"Dixon","year":"2024","journal-title":"Zenodo"},{"key":"mlstad743ebib48","article-title":"Transformers for scattering amplitudes","author":"Merz","year":"2023"},{"key":"mlstad743ebib49","doi-asserted-by":"publisher","first-page":"497","DOI":"10.4310\/MRL.1998.v5.n4.a7","article-title":"Multiple polylogarithms, cyclotomy and modular complexes","volume":"5","author":"Goncharov","year":"1998","journal-title":"Math. Res. Lett."},{"key":"mlstad743ebib50","article-title":"Multiple polylogarithms and mixed Tate motives","author":"Goncharov","year":"2001"},{"key":"mlstad743ebib51","article-title":"Multiple elliptic polylogarithms","author":"Brown","year":"2011"},{"key":"mlstad743ebib52","article-title":"Functions beyond multiple polylogarithms for precision collider physics","author":"Bourjaily","year":"2022"},{"key":"mlstad743ebib53","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/N18-2074","article-title":"Self-attention with relative position representations","author":"Shaw","year":"2018"},{"key":"mlstad743ebib54","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/P19-1285","article-title":"Transformer-xl: attentive language models beyond a fixed-length context","author":"Dai","year":"2019"},{"key":"mlstad743ebib55","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2020-3015","article-title":"Conformer: convolution-augmented transformer for speech recognition","author":"Gulati","year":"2020"},{"key":"mlstad743ebib56","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063","article-title":"Roformer: enhanced transformer with rotary position embedding","volume":"568","author":"Su","year":"2024","journal-title":"Neurocomputing"},{"key":"mlstad743ebib57","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2023.acl-long.816","article-title":"A length-extrapolatable transformer","author":"Sun","year":"2023"},{"key":"mlstad743ebib58","article-title":"Rotary-embedding-torch","author":"Wang","year":"2023"},{"key":"mlstad743ebib59","article-title":"The impact of positional encoding on length generalization in transformers","author":"Kazemnejad","year":"2023"}],"container-title":["Machine Learning: Science and Technology"],"original-title":[],"link":[{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ad743e","content-type":"text\/html","content-version":"am","intended-application":"text-mining"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ad743e\/pdf","content-type":"application\/pdf","content-version":"am","intended-application":"text-mining"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ad743e","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ad743e\/pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ad743e\/pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ad743e\/pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ad743e\/pdf","content-type":"application\/pdf","content-version":"am","intended-application":"similarity-checking"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ad743e\/pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T08:30:40Z","timestamp":1726475440000},"score":1,"resource":{"primary":{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ad743e"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,1]]},"references-count":59,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2024,9,16]]},"published-print":{"date-parts":[[2024,9,1]]}},"URL":"https:\/\/doi.org\/10.1088\/2632-2153\/ad743e","relation":{"has-review":[{"id-type":"doi","id":"10.1088\/2632-2153\/AD743E\/v2\/response1","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/AD743E\/v2\/decision1","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/AD743E\/v1\/review1","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/AD743E\/v1\/review2","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/AD743E\/v1\/decision1","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/AD743E\/v2\/review1","asserted-by":"object"}]},"ISSN":["2632-2153"],"issn-type":[{"value":"2632-2153","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,1]]},"assertion":[{"value":"Transforming the bootstrap: using transformers to compute scattering amplitudes in planar\n                      \n                        \n                          \n                            N\n                          \n                          =\n                          4\n                        \n                      \n                      super Yang\u2013Mills theory","name":"article_title","label":"Article Title"},{"value":"Machine Learning: Science and Technology","name":"journal_title","label":"Journal Title"},{"value":"paper","name":"article_type","label":"Article Type"},{"value":"\u00a9 2024 The Author(s). Published by IOP Publishing Ltd","name":"copyright_information","label":"Copyright Information"},{"value":"2024-05-23","name":"date_received","label":"Date Received","group":{"name":"publication_dates","label":"Publication dates"}},{"value":"2024-08-27","name":"date_accepted","label":"Date Accepted","group":{"name":"publication_dates","label":"Publication dates"}},{"value":"2024-09-16","name":"date_epub","label":"Online publication date","group":{"name":"publication_dates","label":"Publication dates"}}]}}