{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T04:00:27Z","timestamp":1772769627060,"version":"3.50.1"},"reference-count":65,"publisher":"IOP Publishing","issue":"3","license":[{"start":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T00:00:00Z","timestamp":1751846400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"},{"start":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T00:00:00Z","timestamp":1751846400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/iopscience.iop.org\/info\/page\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100017223","name":"National Energy Research Scientific Computing Center","doi-asserted-by":"crossref","award":["DE-AC02-05CH11231"],"award-info":[{"award-number":["DE-AC02-05CH11231"]}],"id":[{"id":"10.13039\/100017223","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100007210","name":"RWTH Aachen University","doi-asserted-by":"crossref","award":["rwth0934"],"award-info":[{"award-number":["rwth0934"]}],"id":[{"id":"10.13039\/501100007210","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Fermi Research Alliance, LLC","award":["DE-AC02-07CH11359"],"award-info":[{"award-number":["DE-AC02-07CH11359"]}]},{"DOI":"10.13039\/100000015","name":"US Department of Energy","doi-asserted-by":"crossref","award":["DOE- SC0010008"],"award-info":[{"award-number":["DOE- SC0010008"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"crossref","award":["396021762 \u2013 TRR 257"],"award-info":[{"award-number":["396021762 \u2013 TRR 257"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100005969","name":"Universit\u00e0 di Bologna","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100005969","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100007739","name":"Aspen Center for Physics","doi-asserted-by":"crossref","award":["PHY-2210452"],"award-info":[{"award-number":["PHY-2210452"]}],"id":[{"id":"10.13039\/100007739","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["iopscience.iop.org"],"crossmark-restriction":false},"short-container-title":["Mach. Learn.: Sci. Technol."],"published-print":{"date-parts":[[2025,9,30]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>\n                    Foundation models are deep learning models pre-trained on large amounts of data which are capable of generalizing to multiple datasets and\/or downstream tasks. This work demonstrates how data collected by the CMS experiment at the Large Hadron Collider can be useful in pre-training foundation models for HEP. Specifically, we introduce the\n                    <jats:sc>AspenOpenJets<\/jats:sc>\n                    (AOJs) dataset, consisting of approximately 178\u2009M high\n                    <jats:inline-formula>\n                      <jats:tex-math>\n                        \n                      <\/jats:tex-math>\n                      <mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" overflow=\"scroll\">\n                        <mml:mrow>\n                          <mml:msub>\n                            <mml:mi>p<\/mml:mi>\n                            <mml:mrow>\n                              <mml:mi mathvariant=\"normal\">T<\/mml:mi>\n                            <\/mml:mrow>\n                          <\/mml:msub>\n                        <\/mml:mrow>\n                      <\/mml:math>\n                    <\/jats:inline-formula>\n                    jets derived from CMS 2016 Open Data. We show how pre-training the\n                    <jats:sc>OmniJet<\/jats:sc>\n                    -\n                    <jats:italic>\u03b1<\/jats:italic>\n                    foundation model on AOJs improves performance on generative tasks with significant domain shift: generating boosted top and QCD jets from the simulated JetClass dataset. In addition to demonstrating the power of pre-training of a jet-based foundation model on actual proton\u2013proton collision data, we provide the ML-ready derived AOJs dataset for further public use.\n                  <\/jats:p>","DOI":"10.1088\/2632-2153\/ade58f","type":"journal-article","created":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:54:57Z","timestamp":1750186497000},"page":"030601","update-policy":"https:\/\/doi.org\/10.1088\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Aspen Open Jets: unlocking LHC data for foundation models in particle physics"],"prefix":"10.1088","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3765-3123","authenticated-orcid":false,"given":"Oz","family":"Amram","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0399-8836","authenticated-orcid":true,"given":"Luca","family":"Anzalone","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1931-0127","authenticated-orcid":false,"given":"Joschka","family":"Birk","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4027-5477","authenticated-orcid":false,"given":"Darius A","family":"Faroughy","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1551-814X","authenticated-orcid":true,"given":"Anna","family":"Hallin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3457-2755","authenticated-orcid":false,"given":"Gregor","family":"Kasieczka","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3089-6827","authenticated-orcid":false,"given":"Michael","family":"Kr\u00e4mer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8225-7269","authenticated-orcid":true,"given":"Ian","family":"Pang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3283-5208","authenticated-orcid":true,"given":"Humberto","family":"Reyes-Gonzalez","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3408-3871","authenticated-orcid":false,"given":"David","family":"Shih","sequence":"additional","affiliation":[]}],"member":"266","published-online":{"date-parts":[[2025,7,7]]},"reference":[{"key":"mlstade58fbib1","author":"Bommasani","year":"2021"},{"key":"mlstade58fbib2","first-page":"pp 1877","volume":"vol 33","author":"Brown","year":"2020"},{"key":"mlstade58fbib3","author":"Devlin","year":"2019"},{"key":"mlstade58fbib4","author":"Touvron","year":"2023"},{"key":"mlstade58fbib5","author":"Saharia","year":"2022"},{"key":"mlstade58fbib6","author":"Radford","year":"2021"},{"key":"mlstade58fbib7","author":"Jia","year":"2021"},{"key":"mlstade58fbib8","author":"Kishimoto","year":"2023"},{"key":"mlstade58fbib9","author":"Qu","year":"2022"},{"key":"mlstade58fbib10","author":"Heinrich","year":"2024"},{"key":"mlstade58fbib11","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/ad66ad","volume":"5","author":"Birk","year":"2024","journal-title":"Mach. Learn. Sci. Tech."},{"key":"mlstade58fbib12","author":"Harris","year":"2024"},{"key":"mlstade58fbib13","author":"Mikuni","year":"2024"},{"key":"mlstade58fbib14","doi-asserted-by":"publisher","DOI":"10.7483\/OPENDATA.CMS.WING.7QKV)","article-title":"Multijet primary dataset in AOD format from RunA of 2011 (\/MultiJet\/Run2012A-22Jan2013-v1\/AOD)","author":"CMS Collaboration","year":"2022"},{"key":"mlstade58fbib15","doi-asserted-by":"publisher","DOI":"10.7483\/OPENDATA.CMS.WING.7QKV)","article-title":"Multijet primary dataset in AOD format from RunA of 2012 (\/MultiJet\/Run2012A-22Jan2013-v1\/AOD)","author":"CMS Collaboration","year":"2022"},{"key":"mlstade58fbib16","doi-asserted-by":"publisher","DOI":"10.7483\/OPENDATA.CMS.1KTG.X0W4)","article-title":"JetHT primary dataset in MINIAOD format from RunG of 2016 (\/JetHT\/Run2016G-UL2016_MiniAODv2-v2\/MINIAOD)","author":"CMS Collaboration","year":"2024"},{"key":"mlstade58fbib17","doi-asserted-by":"publisher","DOI":"10.7483\/OPENDATA.CMS.LT9E.T7RQ)","article-title":"JetHT primary dataset in MINIAOD format from RunH of 2016 (\/JetHT\/Run2016H-UL2016_MiniAODv2-v2\/MINIAOD)","author":"CMS Collaboration","year":"2024"},{"key":"mlstade58fbib18","doi-asserted-by":"publisher","DOI":"10.7483\/OPENDATA.ATLAS.9HK7.P5SI)","article-title":"DAOD_PHYSLITE format 2015-2016 Open Data for Research from the ATLAS experiment","author":"ATLAS Collaboration","year":"2024"},{"key":"mlstade58fbib19","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.119.132003","volume":"119","author":"Larkoski","year":"2017","journal-title":"Phys. Rev. Lett."},{"key":"mlstade58fbib20","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevD.96.074003","volume":"96","author":"Tripathee","year":"2017","journal-title":"Phys. Rev. D"},{"key":"mlstade58fbib21","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevD.101.034009","volume":"101","author":"Komiske","year":"2020","journal-title":"Phys. Rev. D"},{"key":"mlstade58fbib22","author":"Dolan","year":"2023"},{"key":"mlstade58fbib23","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.6619768)","article-title":"JetClass: a large-scale dataset for deep learning in jet physics","author":"Qu","year":"2022"},{"key":"mlstade58fbib24","doi-asserted-by":"publisher","DOI":"10.1088\/1748-0221\/3\/08\/S08004","volume":"3","author":"CMS","year":"2008","journal-title":"JINST"},{"key":"mlstade58fbib25","doi-asserted-by":"publisher","DOI":"10.7483\/OPENDATA.CMS.3S7F.2E9W)","article-title":"Jet primary dataset in AOD format from RunB of 2010 (\/Jet\/Run2010B-Apr21ReReco-v1\/AOD)","author":"CMS Collaboration","year":"2014"},{"key":"mlstade58fbib26","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1088\/1742-6596\/664\/7\/072052","volume":"664","author":"CMS","year":"2015","journal-title":"J. Phys.: Conf. Ser."},{"key":"mlstade58fbib27","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/1525\/1\/012038","volume":"1525","author":"Peruzzi","year":"2020","journal-title":"J. Phys.: Conf. Ser."},{"key":"mlstade58fbib28","doi-asserted-by":"publisher","DOI":"10.1088\/1748-0221\/15\/10\/P10017","volume":"15","author":"CMS","year":"2020","journal-title":"JINST"},{"key":"mlstade58fbib29","doi-asserted-by":"publisher","DOI":"10.1088\/1748-0221\/12\/01\/P01020","volume":"12","author":"CMS","year":"2017","journal-title":"JINST"},{"key":"mlstade58fbib30","doi-asserted-by":"publisher","DOI":"10.1088\/1748-0221\/12\/10\/P10003","volume":"12","author":"CMS","year":"2017","journal-title":"JINST"},{"key":"mlstade58fbib31","doi-asserted-by":"publisher","first-page":"JHEP04(2008)063","DOI":"10.1088\/1126-6708\/2008\/04\/063","author":"Cacciari","year":"2008","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib32","doi-asserted-by":"publisher","first-page":"1896","DOI":"10.1140\/epjc\/s10052-012-1896-2","volume":"C72","author":"Cacciari","year":"2012","journal-title":"Eur. Phys. J."},{"key":"mlstade58fbib33","doi-asserted-by":"publisher","DOI":"10.1088\/1748-0221\/15\/09\/p0901810.1088\/1748-0221\/15\/09\/p09018","volume":"15","author":"CMS","year":"2020","journal-title":"JINST"},{"key":"mlstade58fbib34","doi-asserted-by":"publisher","first-page":"JHEP10(2014)059","DOI":"10.1007\/JHEP10(2014)059","author":"Bertolini","year":"2014","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib35","doi-asserted-by":"publisher","DOI":"10.1088\/1748-0221\/12\/02\/P02014","volume":"12","author":"CMS","year":"2017","journal-title":"JINST"},{"key":"mlstade58fbib36","article-title":"PFNano producer tool for CMS 2016","author":"CMS Collaboration","year":"2024"},{"key":"mlstade58fbib37","article-title":"Monte Carlo simulations of 2016 data","author":"CMS Collaboration","year":"2024"},{"key":"mlstade58fbib38","doi-asserted-by":"publisher","first-page":"JHEP05(2014)146","DOI":"10.1007\/JHEP05(2014)146","author":"Larkoski","year":"2014","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib39","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevD.110.030001","volume":"110","author":"Particle Data Group","year":"2024","journal-title":"Phys. Rev. D"},{"key":"mlstade58fbib40","doi-asserted-by":"publisher","first-page":"JHEP03(2011)015","DOI":"10.1007\/JHEP03(2011)015","author":"Thaler","year":"2011","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib41","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevD.101.056019","volume":"101","author":"Qu","year":"2020","journal-title":"Phys. Rev. D"},{"key":"mlstade58fbib42","author":"CMS","year":"2020"},{"key":"mlstade58fbib43","doi-asserted-by":"publisher","first-page":"JHEP07(2014)079","DOI":"10.1007\/JHEP07(2014)079","author":"Alwall","year":"2014","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib44","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1016\/j.cpc.2015.01.024","volume":"191","author":"Sj\u00f6strand","year":"2015","journal-title":"Comput. Phys. Commun."},{"key":"mlstade58fbib45","doi-asserted-by":"publisher","first-page":"JHEP02(2014)057","DOI":"10.1007\/JHEP02(2014)057","author":"DELPHES 3","year":"2014","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib46","author":"Radford","year":"2018"},{"key":"mlstade58fbib47","author":"van den Oord","year":"2018"},{"key":"mlstade58fbib48","author":"Bao","year":"2022"},{"key":"mlstade58fbib49","author":"Huh","year":"2023"},{"key":"mlstade58fbib50","article-title":"GitHub repository","author":"Wright","year":"2019"},{"key":"mlstade58fbib51","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1038\/s41592-019-0686-2","volume":"17","author":"Virtanen","year":"2020","journal-title":"Nat. Methods"},{"key":"mlstade58fbib52","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevD.107.113003","volume":"107","author":"Krause","year":"2023","journal-title":"Phys. Rev. D"},{"key":"mlstade58fbib53","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevD.107.076017","volume":"107","author":"Kansal","year":"2023","journal-title":"Phys. Rev. D"},{"key":"mlstade58fbib54","doi-asserted-by":"publisher","first-page":"031","DOI":"10.21468\/SciPostPhys.16.1.031","volume":"16","author":"Das","year":"2024","journal-title":"SciPost Phys."},{"key":"mlstade58fbib55","author":"Hernandez","year":"2021"},{"key":"mlstade58fbib56","author":"Villalobos","year":"2023"},{"key":"mlstade58fbib57","author":"Batson","year":"2023"},{"key":"mlstade58fbib58","author":"Hestness","year":"2017"},{"key":"mlstade58fbib59","doi-asserted-by":"publisher","first-page":"JHEP 0411:040","DOI":"10.1088\/1126-6708\/2004\/11\/040","author":"Nason","year":"2004","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib60","doi-asserted-by":"publisher","first-page":"JHEP09(2007)126","DOI":"10.1088\/1126-6708\/2007\/09\/126","author":"Frixione","year":"2007","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib61","doi-asserted-by":"publisher","first-page":"JHEP06(2010)043","DOI":"10.1007\/JHEP06(2010)043","author":"Alioli","year":"2010","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib62","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1140\/epjc\/s10052-019-7499-4","volume":"80","author":"CMS","year":"2020","journal-title":"Eur. Phys. J. C"},{"key":"mlstade58fbib63","doi-asserted-by":"publisher","first-page":"136","DOI":"10.1016\/j.nuclphysb.2010.05.008","volume":"838","author":"Ball","year":"2010","journal-title":"Nucl. Phys. B"},{"key":"mlstade58fbib64","doi-asserted-by":"publisher","first-page":"JHEP04(2015)040","DOI":"10.1007\/JHEP04(2015)040","author":"NNPDF","year":"2015","journal-title":"J. High Energy Phys."},{"key":"mlstade58fbib65","doi-asserted-by":"publisher","first-page":"663","DOI":"10.1140\/epjc\/s10052-017-5199-5","volume":"77","author":"NNPDF","year":"2017","journal-title":"Eur. Phys. J. C"}],"container-title":["Machine Learning: Science and Technology"],"original-title":[],"link":[{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ade58f","content-type":"text\/html","content-version":"am","intended-application":"text-mining"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ade58f\/pdf","content-type":"application\/pdf","content-version":"am","intended-application":"text-mining"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ade58f","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ade58f\/pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ade58f\/pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ade58f\/pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ade58f\/pdf","content-type":"application\/pdf","content-version":"am","intended-application":"similarity-checking"},{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ade58f\/pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T07:01:20Z","timestamp":1751871680000},"score":1,"resource":{"primary":{"URL":"https:\/\/iopscience.iop.org\/article\/10.1088\/2632-2153\/ade58f"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,7]]},"references-count":65,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2025,7,7]]},"published-print":{"date-parts":[[2025,9,30]]}},"URL":"https:\/\/doi.org\/10.1088\/2632-2153\/ade58f","relation":{"has-review":[{"id-type":"doi","id":"10.1088\/2632-2153\/ADE58F\/v2\/review1","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/ADE58F\/v1\/decision1","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/ADE58F\/v1\/review2","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/ADE58F\/v2\/review2","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/ADE58F\/v1\/review1","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/ADE58F\/v2\/decision1","asserted-by":"object"},{"id-type":"doi","id":"10.1088\/2632-2153\/ADE58F\/v2\/response1","asserted-by":"object"}]},"ISSN":["2632-2153"],"issn-type":[{"value":"2632-2153","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,7]]},"assertion":[{"value":"Aspen Open Jets: unlocking LHC data for foundation models in particle physics","name":"article_title","label":"Article Title"},{"value":"Machine Learning: Science and Technology","name":"journal_title","label":"Journal Title"},{"value":"paper","name":"article_type","label":"Article Type"},{"value":"\u00a9 2025 The Author(s). Published by IOP Publishing Ltd","name":"copyright_information","label":"Copyright Information"},{"value":"2025-01-22","name":"date_received","label":"Date Received","group":{"name":"publication_dates","label":"Publication dates"}},{"value":"2025-06-17","name":"date_accepted","label":"Date Accepted","group":{"name":"publication_dates","label":"Publication dates"}},{"value":"2025-07-07","name":"date_epub","label":"Online publication date","group":{"name":"publication_dates","label":"Publication dates"}}]}}