<!DOCTYPE html><html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" style="font-size:16px;"><head></head><head><meta charset="utf-8"/><!--[if !mso]><!--><meta http-equiv="X-UA-Compatible" content="IE=edge"/><!--<![endif]--><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="x-apple-disable-message-reformatting"/><meta name="format-detection" content="telephone=no,address=no,email=no,date=no,url=no"/><meta name="color-scheme" content="light"/><meta name="supported-color-schemes" content="light"/><title>"3D" Attention Is Here!</title><!--[if mso]><xml><o:OfficeDocumentSettings><o:AllowPNG/><o:PixelsPerInch>96</o:PixelsPerInch></o:OfficeDocumentSettings></xml><![endif]--><style> :root { color-scheme: light; supported-color-schemes: light; } body { margin: 0; padding: 0; min-width: 100%!important; -ms-text-size-adjust: 100% !important; -webkit-transform: scale(1) !important; -webkit-text-size-adjust: 100% !important; -webkit-font-smoothing: antialiased !important; } .body { word-wrap: normal; word-spacing:normal; } table.mso { width: 100%; border-collapse: collapse; padding: 0; table-layout: fixed; } img { border: 0; outline: none; } table { mso-table-lspace: 0px; mso-table-rspace: 0px; } td, a, span { mso-line-height-rule: exactly; } #root [x-apple-data-detectors=true], a[x-apple-data-detectors=true], #MessageViewBody a { color: inherit !important; text-decoration: inherit !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important; } span.MsoHyperlink { color: inherit !important; mso-style-priority: 99 !important; } span.MsoHyperlinkFollowed { color: inherit !important; mso-style-priority: 99 !important; } .a { background-color:#dedede; } .b { background-color:#2a2a2a; } .c { background-color:#ffffff; } .d { background-color:#fff0c8; } .d2 { background-color:#FFFFFF; } .d3 { background-color:#FFFFFF; } h1 a { text-decoration:none;color:#2C81E5;font-style:italic; } h2 a { text-decoration:none;color:#2C81E5;font-style:italic; } h3 a { text-decoration:none;color:#2C81E5;font-style:italic; } h4 a { text-decoration:none;color:#2C81E5;font-style:italic; } h5 a { text-decoration:none;color:#2C81E5;font-style:italic; } h6 a { text-decoration:none;color:#2C81E5;font-style:italic; } h1, h1 a, h2, h2 a, h3, h3 a, h4, h4 a, h5, h5 a, h6, h6 a, ul, li, ol, p, p a { margin: 0;padding: 0; } h1 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:28px;color:#2A2A2A;line-height:42px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h2 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:24px;color:#2A2A2A;line-height:36px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h3 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:20px;color:#2A2A2A;line-height:30px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h4 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:18px;color:#2A2A2A;line-height:27px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h5 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:16px;color:#2A2A2A;line-height:24px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h6 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:14px;color:#2A2A2A;line-height:21px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } p { font-family:'Georgia','Times New Roman',serif;font-weight:400;color:#2D2D2D;font-size:16px;line-height:24px;padding-bottom:8px;padding-top:8px;mso-margin-top-alt:8px;mso-margin-bottom-alt:8px; } p a, .e a, ul a, li a, .h a, .h2 a, .h3 a { word-break:break-word;color:#2C81E5 !important;text-decoration:none;font-style:italic; } p a span, .e a span, ul a span, li a span { color: inherit } p .bold { font-weight:bold;color:#2D2D2D; } p span[style*="font-size"] { line-height: 1.6; } .f p { font-size:12px;line-height:15px;color:#2D2D2D;padding:0; } .f p a { color:#2D2D2D !important; } .g p { font-family:'Helvetica',Arial,sans-serif;font-size:14px;line-height:20px;font-weight:normal;margin:0; } .g p a { text-decoration: underline; } .i p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; } .i p a { color:#2D2D2D !important; } .i2 p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; } .i2 p a { color:#2D2D2D !important; } .i3 p { font-family:'Helvetica',Arial,sans-serif;line-height:43px;font-size:24px;color:#2D2D2D; } .i3 p a { color:#2D2D2D !important; } .h p a { color:#595959 !important; } .h2 p a { color:#595959 !important; } .h3 p a { color:#595959 !important; } .f p a, .i p a, .i2 p a, .i3 p a, .h p a, .h2 p a, .h3 p a { text-decoration:underline; } .j { border-top:3px solid #ffeb2d; } .k p { padding-left:15px;padding-bottom:0px;padding-top:6px;mso-margin-top-alt:6px;mso-margin-bottom-alt:0px;mso-margin-left-alt:15px; } .o { background-color:#FFFFFF;border:1px solid #F1F1F1;border-radius:5px; } .o p { font-family:'Helvetica',Arial,sans-serif;padding:0px;margin:0px; } .l p, .l p a { font-size:14px;line-height:20px;font-weight: bold;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; } .m p, .m p a { font-size:13px;line-height:18px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; } .n p, .n p a { font-size:12px;line-height:17px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; } .p { background-color:#FFFFFF;max-width:520px;border:1px solid #E1E8ED;border:1px solid rgba(80, 80, 80, 0.3);border-radius:5px; } .q { font-size:16px;font-family:Helvetica,Roboto,Calibri,sans-serif !important;border:1px solid #e1e8ed;border:1px solid rgba(80, 80, 80, 0.3);border-radius:10px;background-color:#FFFFFF; } .q p { font-size:16px;font-family:system-ui,Helvetica,Roboto,Calibri,sans-serif !important;color:#222222;padding:4px 0; } .r { border:1px solid #E1E8ED !important;border-radius:5px; } .s p { font-size: 14px; line-height: 17px; font-weight: 400; color: #697882; text-decoration: none; } .t p { font-family:'Helvetica',Arial,sans-serif;font-size:12px;line-height:18px;font-weight:400;color:#000000;font-style:italic;padding:4px 0px 0px; } .v { border-radius:10px;border:solid 0px #DFD150;background-color:#2C81E5;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;color:#FFFFFF; } .v a { text-decoration:none;display:block;color:#FFFFFF; } .w p { font-size:12px;line-height:15px;font-weight:400;color:#FFFFFF; } .w p a { text-decoration: underline !important;color:#FFFFFF !important; } ul { font-family:'Helvetica',Arial,sans-serif;margin:0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:disc;font-size:16px; } ul > li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:disc; } ol { font-family:'Helvetica',Arial,sans-serif;margin: 0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:decimal;font-size:16px; } ol > li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:decimal; } .e h3, .e p, .e span { padding-bottom:0px;padding-top:0px;mso-margin-top-alt:0px;mso-margin-bottom-alt:0px; } .e span, .e li { font-family:'Helvetica',Arial,sans-serif;font-size:16px;color:#2D2D2D;line-height:24px; } .rec { font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji" !important; } .rec__button:hover { background-color: #f9fafb !important; } .copyright a {color: inherit !important; text-decoration: none !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important;} .txt_social p { padding: 0; word-break: break-all; } .table, .table-c, .table-h { border: 1px solid #C0C0C0; } .table-c { padding:5px; background-color:#FFFFFF; } .table-c p { color: #2D2D2D; font-family:'Helvetica',Arial,sans-serif !important;overflow-wrap: break-word; } .table-h { padding:5px; background-color:#F1F1F1; } .table-h p { color: #2A2A2A; font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif !important;overflow-wrap: break-word; } @media only screen and (max-width:667px) { .aa { width: 100% !important; } .bb img { width: 100% !important; height: auto !important; max-width: none !important; } .cc { padding: 0px 8px !important; } .ee { padding-top:10px !important;padding-bottom:10px !important; } .ff ul, .ff ol { margin: 0px 0px 0px 10px !important;padding: 0px !important; } .ff li { margin:10px 0px 0px 10px !important; } .r {height:140px !important;} .s p { font-size:13px !important;line-height:15px !important; } .mob-hide {display:none !important;} .mob-stack {display:block !important;width:100% !important;} .mob-w-full {width:100% !important;} .mob-block {display:block !important;} .embed-img {padding:0px 0px 12px 0px !important;} .socialShare {padding-top:15px !important;} .rec { padding-left:15px!important;padding-right:15px!important; } .bodyWrapper { padding:7px 4px 7px 4px !important; } .social-mobile {float:left !important;margin-top:10px !important;} } @media screen and (max-width: 480px) { u + .a .gg { width: 100% !important; width: 100vw !important; } .tok-heart { padding-top:75% !important; } .tok-play { padding-top: 250px !important; } } @media screen and (max-width: 320px) { .tok-heart { padding-top:65% !important; } } .u { border: 1px solid #CACACA !important; border-radius: 2px !important; background-color: #ffffff !important; padding: 0px 13px 0px 13px !important; font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif !important;font-size: 12px !important; color: #767676 !important; } .u a { text-decoration: none; display: block !important; color: #767676 !important; margin: 0px !important; } .u span, .u img { color: #767676 !important;margin:0px !important; max-height:32px !important;background-color:#ffffff !important; } </style><!--[if mso]><style type="text/css"> h1, h2, h3, h4, h5, h6 {font-family: Arial, sans-serif !important;} body, table, td, p, a, span {font-family: Arial, sans-serif !important;} sup { font-size: 100% !important;vertical-align: .5em !important;mso-text-raise: -1.5% !important;line-height: 0 !important; } ul { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; } ul li { margin-left: 0px !important; mso-special-format: decimal; } ol { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; } ol li { margin-left: 0px !important; mso-special-format: decimal; } li.listItem { margin-left:15px !important; margin-top:0px !important; } .paddingDesktop { padding: 10px 0 !important; } .edm_outlooklist { margin-left: -20px !important; } .embedImage { display:none !important; } </style><![endif]--><style> @font-face { font-family: 'Open Sans'; font-style: normal; font-weight: 700; font-display: swap; src: url('https://fonts.gstatic.com/s/opensans/v40/memSYaGs126MiZpBA-UvWbX2vVnXBbObj2OVZyOOSr4dVJWUgsg-1x4gaVIUwaEQbjA.woff2') format('woff2'); } @font-face { font-family: 'Open Sans'; font-style: italic; font-weight: 700; font-display: swap; src: url('https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@1,700&display=swap') format('woff2'); } </style></head><body class="a" style="margin:0px auto;padding:0px;word-wrap:normal;word-spacing:normal;background-color:#dedede;"><div role="article" aria-roledescription="email" aria-label="email_name" lang="en" style="font-size:1rem"><div style="display:none;max-height:0px;overflow:hidden;"> Read more about new the attention technique "3D Attention", plus more about RL experiments and hyperparameter sweep in this week's AI Timeline...  ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ </div><table role="none" width="100%" border="0" cellspacing="0" align="center" cellpadding="0" class="gg"><tr><td align="center" valign="top"><table role="none" width="670" border="0" cellspacing="0" cellpadding="0" class="aa" style="width:670px;table-layout:fixed;"><tr><td class="bodyWrapper" align="center" valign="top" style="padding:7px 7px 7px 7px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top" style="border-width:0px 0px 0px 0px;border-style: solid; border-color: #2a2a2a;border-radius:10px 10px 0px 0px;background-color:#ffffff;" class="c"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr id="header"><td style="padding:28px 28px 0px 28px;"><div style="padding-top:0px;padding-right:0px;padding-bottom:20px;padding-left:0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td class="f" align="right" valign="top"><p> July 08, 2025 | <a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3Ey3_1o3vxzOyxgxmpk478tWxU7RLD8mqU3bIf3E7vZlhSf6gMcMqqG0L-BksfoGPDh2nrxAkGB08ZzbudDHBUIp1almy4JWjvQ4e9_tRmnLK9k5EhdvczAeDKv0ff1edeVtqmCvVmKFKP43YkIaDr07Lspph3W6EfQtjFr7sBUzHGDb_9DXNZawSGZwxgo3iDYoxKk0PhxN-R56DunTzvBhy7CDKIHJxJU_3IRb_6q20rFYCiqZifVXZowbEF2aUAxXeCoybaYdRMG2vuTZB2w-FFQNPcV5NMUdNfCs84cOF0ePXCVD43ybqRITXyTkQz2JuM1wUfHqsmXQv_Ul_AImU0XpVxnZdtEawYGt2lOVQpVfZUN2eHFsFvS1pfMtsl57y3Uq4d722Uxg-leXvTFZSy9A2cvJAGHpnW2dDbTSQ1U6V2PTRLZzNdrQMjDKw2cgLN3RGMlTqE0P4eJ2JeOB2i3Wpp0gF1JacxX1ARgGNEDp9-jNNOzSxPybNLjoiHXnj1_9I5U9WUZDsUiYnN-8THgzi79QkVTSyFNrjsebjhclYla3PU_QyVwemX0fQvQoksa4ybjgd3PPc0mQY9eqkVLQN67xQVKozrn0zh-WMP1lyX-1uxcECRWgBO79Q54/4i0/aCJz-ogfTc-OmNGA6T64eQ/h0/h001.3MvCzo-vFpQXXiAZ59D7wIWDAbkDn3YeF6CQ5znZ3L4">Read Online</a></p></td></tr><tr><td class="dd" align="center" valign="top" style="padding:15px 0;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top"><h1 style="text-align:left;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-weight:Bold;font-size:32px;color:#2A2A2A;padding:2px 0;line-height:38px;"> "3D" Attention Is Here! </h1><p style="text-align:left;font-family:'Helvetica',Arial,sans-serif;font-weight:normal;font-size:20px;color:#3E3E3E;padding:5px 0;line-height:24px;"> Read more about new the attention technique "3D Attention", plus more about RL experiments and hyperparameter sweep in this week's AI Timeline... </p></td></tr></table></td></tr><tr><td style="height:0px;width:0px;"><div style="height:1px;" data-open-tracking="true"> <img src="https://elink4f7.mail.bycloud.ai/ss/o/u001.3wmUuY8gEWd4_869a_eXcg/4i0/aCJz-ogfTc-OmNGA6T64eQ/ho.gif" alt="" width="1" height="1" border="0" style="height:1px !important;width:1px !important;border-width:0 !important;margin-top:0 !important;margin-bottom:0 !important;margin-right:0 !important;margin-left:0 !important;padding-top:0 !important;padding-bottom:0 !important;padding-right:0 !important;padding-left:0 !important;"/> </div></td></tr></table></div></td></tr><tr id="content-blocks"><td class="email-card-body" align="center" valign="top" style="padding-bottom:28px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td id="nov-18-th-nov-24-th-33-latest-ai-re" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h6 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:87.5%;"><i>June 30th ~ July 6th</i><br><i>#63 Latest AI Research Explained Simply</i></h6></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="industry-news-in-1-line" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;">🗞️ Industry News in 1 Line</h2></td></tr><tr><td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"><div style="margin-left:0px;" class="edm_outlooklist"><ol start="1" style="list-style-type:decimal;margin:0px 0px;padding:0px 0px 0px 0px;"><li class="listItem ultext"><p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;">♥ 1k</span></span> Google released <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.9ggl6Mt0xphuuMReR5gVpTanKmQxfkMnn7OJKz54OPG3XkE2WSFsxvdw5sRm9k1negttf2oS-SuqLc1XI_TcbYX_tF2Ih8MuP1_68YkyoxO3mVVk3-CIlD9kcmChv3OfYQf4puzs6Tp1Gq5jkpQEvOam6BICcN3l4iQli_TEjd8/4i0/aCJz-ogfTc-OmNGA6T64eQ/h1/h001.h6hgQeQik-GnJVnTm3Ac4ruVhKDIWpiCYYxS2T16-gs" target="_blank" rel="noopener noreferrer nofollow"><span>Gemma 3n</span></a>, SoTA open models that is capable of running on mobile devices. This series has a 5B and 8B model, each can be ran on setup that typically requires a 4B model. </p><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:480px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/7bd6e60e-3ea4-422e-b7bd-b099e4732632/image.png?t=1751990468" alt="" height="auto" width="480" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:480px;"><p>Gemma 3n Chatbot Arena Score</p></td></tr></table></li><li class="listItem ultext"><p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;">♥ ???</span></span> Other than that, this week’s AI news is not really that exciting… </p></li></ol></div></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="transparent" style="background-color:transparent;border-color:#2C81E5;border-style:solid;border-width:5px;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"><span style="">The AI Timeline: Premium Insights</span></h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">Recently, we introduced a premium membership for The AI Timeline! </span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">With the membership, you would receive exclusive insights/explainers into technical AI topics and monthly research trend reports that contains my insights & analysis of up to 40+ papers.</span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">Check out the </span><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;"><b>Monthly Research Reports (~4000 words) </b></span><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">here:</span></p></td></tr><tr><td align="center" valign="top" class="dd"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EzJsyYxqCxPYVi1lUfEXmFGhbtmy_ZjMJuDYeLyDuZMEUVnXc37LYVUiyhrv6Go6KCsUvULh6AWaWx3y6GjLqhCtxHLZgm29M4N6iVhMhVVBc_H2dcD57IVTiSYV22AFLtDx5qC1UB89zifNRietx67HQ_wZnqFLanv6KweOvDtzq6S6HMmEqJSmcy0IVQJ_tShAkj-ZSrUiEAshGEVyIyuQJrHxhYkdQP3f9NFp237Zm2zCpgPYVyIdCFDXHTJi286CS_My1_PeZZHbTHqIU05jwMFxYZuaA6CPN4vl1N1U6VlX0bKVaPhY3oVf1ufM7LLmSkhckTgodrCO2Mh4NJARQkr2kPIc7eQOVHlkkIg9IdlHBrz5cNyMghI70qvyF33POl95O73oEY44p_M6JdQJQhCLChHZwta62Tk3nOeKOyxrUafzS3Rnv8D5d9Vwx-OAHvjWB9xHlKxHpLFD9FDnp-8t_3a51851dUWlVbWJwVZHzhfxzBTBfSGnz6eE31IU1Oa6-ktlqBvpRaAL-6YAAjyqkjLBNfy2Ys9JM0gh86eemD37pV6TbbEGbq-DERL6LIK-vNZoMT0bPvVjWMoRd-Th39kPwIgIS7i6LKTYfNh0h-3mBk5GOC_qnZaR74C8MCuOcJryJ1VrpcU6_DF/4i0/aCJz-ogfTc-OmNGA6T64eQ/h2/h001.jkQrJUI1hKf5HbfIheWb7bOP3L6XhwazQQ3ADMI63NA" style="text-decoration:none;" target="_blank"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"><tr><td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"><tr><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="embed-img mob-stack" align="center" valign="top" style="width:35%;min-height:100px;vertical-align:middle;display:none;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EzJsyYxqCxPYVi1lUfEXmFGhbtmy_ZjMJuDYeLyDuZMEUVnXc37LYVUiyhrv6Go6KArkQbSfAHSKADAunbYaT05ErRchPGF5UnOu985e_5oWeb6cCALl3ZWEHY1H-zxrmDElSK1SUPKOS3XyczWnQQOG5kbWiIfjho4Gq9VmpVrAMsvGHDNamt3Shdt07052zdX5z-VrMegF48WHOxmGha-QDkgj6X9kdaikUVQnz_fqvvfS-8IsjkCavDRfbtQ1E3HCkpg66MeOUVXZwX7JmMqONPDuS9PMx4cVkcKGtiNWg4A1K4UaY9bi9Psp6XPpT7iJDFPAK9LuxOOpUePvqjqrWZMOFHPOBK3QMmD-f-s7-l3yku6XQEi212UwW6eHmCDq5D-PJGYQK4OTfqd9wnoqhtHy46RpTTA8P_RYu1MgPLkYecDDnrlpdvi4tdyWzQa5mV3M-JA7Q3Nyl6xTurhKBKjGmjKbueZLLMgQINCvQyF0gFx7ODVZAezqY3nL18863rkf4GE4b4eME9Bf7tSU726Jb5bxnn63Ry--8PiKySvl8HfGeYSuHELNO4wplVzJ4m47WExrRU6Wut4MqI8I4F9Lg7iVUSP4MT4FQ9xn81z9WFXCYFytZDi79kL6cDWtzrAk7l720DoaUpYvKml/4i0/aCJz-ogfTc-OmNGA6T64eQ/h3/h001.9sOAfoPsMTLclAzWuc6PBvZ8WzEQpLh5zHSURB_qvFg" style="text-decoration:none;" target="_blank"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/b7d2a67b-4ab0-422b-9dba-8908e114afb0/June_2025_research_trend_report.jpg?t=1751887513" width="100%" style="display:block;"/></a></td><!--[if mso]></tr></table></td><![endif]--></tr><tr><td align="center" valign="top" class="cc"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="left" valign="top" class="l"><p>June 2025 Research Trend Report</p></td></tr><tr><td align="left" valign="top" class="m"><p>Premium Insights: A recap of popular AI research papers and research trends in June 2025</p></td></tr><tr><td align="left" valign="bottom" class="n" style="vertical-align:bottom;padding-top:12px;"><p style="word-break:break-word;">mail.bycloud.ai/p/june-2025-research-trend-report-4b19de8c9019c9e7</p></td></tr></table></td><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="mob-hide" align="center" valign="top" style="width:35%;min-height:100px;padding:0px 0px 0px 12px;vertical-align:middle;"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/b7d2a67b-4ab0-422b-9dba-8908e114afb0/June_2025_research_trend_report.jpg?t=1751887513" width="100%" style="display:block;"/></td><!--[if mso]></tr></table></td><![endif]--></tr></table></td></tr></table></a></td></tr><tr><td align="center" valign="top" class="dd"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EzRMZU7syrc4B7ZADDNO77fF1CP9dNVR_P9aEWpRp8MUJs4-GxSOtnm-_GrfclATvzA6KgzXx0OKmlBOTkzUXcWuUvIqb69Luo8oST5epIEF1L5eLtOK2SFiI5C3CpPyH3VHhZkI-03SC5hIK8UgmByEiDGXQuiV-Znlt-JJ0Gu7wEDeOhUxmm4r5loZVcwEr-MXaaxQdkuaels601-_c27OjDvPYJLOx8s_NOhDsjbCD1z9HX2Rbdxl3u9v6HT3Ue0nI6heuZcpt1jqTohRCzLlbNhuABWRLQxSK5wGnjcZ2hhzv9UaZaGd7bTg1t8Ncu3kXT-z8npL1rybvHI1CzehZnZhMm8U9JNEKBPTHwqTwGu6fDn6SEGWl6-g9TEyfSvKj1VZBubNjeV9FS-6uU94sZHpQJkEsO88VByJCFmvWccR8Rrdgrci8t5hRm1cvd_UfeO7LRm-ychk_pgob-cUjDM5mouMURzcqxBNJ11TgvqdvCK59iBO578OnteYPMbApc-IG9IU1kGyfjpazc6MDOyBqXlv7syW0-xSVcvyZmusKtfXtWQuyJUgtXwdSYe3qS9xxfMK0rNOlnutcT86wCoYbN4H13zC0iSEZwcJdWeitHKx5GVkoygKEjLXMg/4i0/aCJz-ogfTc-OmNGA6T64eQ/h4/h001.1WkJkHX2l_d3VAvA7e_-aE0IYIXHH0c4KQf1O9WQV9c" style="text-decoration:none;" target="_blank"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"><tr><td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"><tr><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="embed-img mob-stack" align="center" valign="top" style="width:35%;min-height:100px;vertical-align:middle;display:none;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EzRMZU7syrc4B7ZADDNO77fF1CP9dNVR_P9aEWpRp8MUEodPNBwgKzPBQvZX63W3wlH_CQ84U-MKW9KM6tyxKLmBqgYIZojPTieGcmRSpceKN0OtF2rMRFtCnYPgtLRe1YDgiuz39x-ylsTcBQkkfQFNWKCpekfNhmgqA5W1D-xPaK5nHy2w9ol1fOu4RCwNqeyw-u5LGTObT4M80vbwqArekXVEGQpmZw_AcPsApUnFp7c49qmwoSZJevHrGTgLF5NbWgzEK68sj1z48I2JxGxLsjjcuQfAT7737q5hVjq_Jqa5yGnbH7js6V1RGLd_G4Ebo-tIJBZdag_Njw2GhSs6ilVKOi-d4kqtZL_Eh72mdTjp1F5ciIjs2434MRDFy85MayX47B1mIjodTq9DD51F0lpqmBcYn1114q9G53fYU6XEI7y_NgCm-ianrkJciRyGbJhm3Sw8FHpbIKU8E4zXSvjSmNmwNWxV-DGPBy0hhAr27oXR6V-2EaPB6NKHwCzx7P7pBMXt77BwxVVCE_0xPwcOWfEm1YJmtdn6jdpu5244S-5P2sIcGu-fquEDiEahhIOMG7jpdaQ8AGlpP1Fo6xgyvzCPA5SukaPwlN641SgqafSq6BBmEz8rgetq2c/4i0/aCJz-ogfTc-OmNGA6T64eQ/h5/h001.YrAq6Jzhl5Hw6odEQCujTtizf6BsZ8Q0Tbnyvfm4DIc" style="text-decoration:none;" target="_blank"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/ec385139-4e87-42bb-933d-9a33db719a7f/May_2025_research_trend_report.jpg?t=1748926968" width="100%" style="display:block;"/></a></td><!--[if mso]></tr></table></td><![endif]--></tr><tr><td align="center" valign="top" class="cc"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="left" valign="top" class="l"><p>May 2025 Research Trend Report</p></td></tr><tr><td align="left" valign="top" class="m"><p>Premium Insights: A recap of popular AI research papers and research trends in May 2025 </p></td></tr><tr><td align="left" valign="bottom" class="n" style="vertical-align:bottom;padding-top:12px;"><p style="word-break:break-word;">mail.bycloud.ai/p/may-2025-research-trend-report</p></td></tr></table></td><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="mob-hide" align="center" valign="top" style="width:35%;min-height:100px;padding:0px 0px 0px 12px;vertical-align:middle;"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/ec385139-4e87-42bb-933d-9a33db719a7f/May_2025_research_trend_report.jpg?t=1748926968" width="100%" style="display:block;"/></td><!--[if mso]></tr></table></td><![endif]--></tr></table></td></tr></table></a></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">Plus, we are also scheduling more technical explainers, so subscribe now to stay tuned!</span></p></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxcTFNXiisgoxAScOL72N4DHVytoeFQ-mBBAkS5Q6m2B0JsVl7Ceocxpip3rVxsdDNU-abgLsphn3E_51ocYe78bpPmEhUU8HruSVyMtxjK0WyvPFT05xO4_m-Ra5-m3Npyw7o8rdbMnw3uoa1js-7qg9pMmsDF_hTQE2h1ZdNzODcDkG8-1yM6Hi2VyipL8THPihgyqEqJND0cyPMecl23zZRt3gXstIGTWdXioVdpf-pq25mf1qHyL_lMj4ltcl4ymTGnWaLUKxCIkaObwp5iF-KZxbTswrOF4zFzs9smhc-xpfEC7IxJshhAhv7YbEOAsaJLvISKMurwRabw96Iwyhfqb_s2jv6KSFiZkYS3vza04g_Q0YoCZMbfLiUyeaaM-DcbzKWyQCY_TOcHuXGEdvWf4FSWOowAWk1Qw2kgezyeboO9qvPeScY2Qywm4vyGSPcA3yMciITREK_VX_34eqokiHRWjOHsFWdHpPsW8tdl0tXCi5pWkWjm2mx01WGWkKusAP2A1biwqmdFujQfvtM9zI78T9TpwiHJuCGQrS8MdhGlgbSvL4x5VuavlhoMwrXI_sIh6aXFqMZd46ofyO0FIlNPdN73vkAmu3-HQp/4i0/aCJz-ogfTc-OmNGA6T64eQ/h6/h001.eJddTjeAIY2dNye-0i4xQ0jo1aKEhKuW0J7qlBZ2k_s" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Upgrade To Premium Insights </a></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style=""><sub><a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoGymQ3NNPtd5dE5MV_8UgjIDFPVXngz8pvQBldSW42yhUe_Qiq6DgEPMEBuPL9yfRpXelTiuu2kS8pLFvsoem_XoZoy_n13sTKUhZIbl0VH6/4i0/aCJz-ogfTc-OmNGA6T64eQ/h7/h001.7d8_zbabuxiwlse_hf8S3OOhv-b8_FsIoH8UtZzr9MU" target="_blank" rel="noopener noreferrer nofollow"><span>Advertise with The AI Timeline!</span></a></sub></span></p></td></tr></table></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="the-automated-llm-speedrunning-benc" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;">The Automated LLM Speedrunning Benchmark: Reproducing NanoGPT Improvements</h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style=""><i>Zhao et al. [Meta, University of Edinburgh]</i></span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;"> ♥ 1k </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> LLM Self-Improving </span></span></p></td></tr><tr><td id="introduction-to-nano-gpt" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Introduction to NanoGPT</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Scientific progress relies on trustworthy, reproducible results. But can AI agents actually help by reimplementing and building on existing research? A new benchmark tackles this by testing how well these agents reproduce improvements in training large language models. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The Automated LLM Speedrunning Benchmark uses the NanoGPT Speedrun, a community effort that reduced GPT-2 training times from 45 minutes to under 3 minutes. However, even though the researchers provided detailed hints that described each improvement, current AI agents struggle to match human innovations. This gap indicates that automated reproducibility is a roadblock for AI-driven science. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/5be3e506-700f-4af5-8d65-e1101f5e62dd/image.png?t=1751988278" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>The Automated LLM Speedrunning Benchmark</p></td></tr></table></td></tr><tr><td id="verifying-scientific-claims-using-l" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Verifying Scientific Claims using LLMs</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The researchers in this study evaluated agents using a benchmark across 19 tasks, and each of them required them to speed up training starting from a previous record’s code. Agents receive hints in three formats: pseudocode, plain-text descriptions, or mini-papers summarizing changes. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/44d602aa-0f59-413e-844d-f4d1059e65d4/algo_optimizer.png?t=1751988337" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> They operate within a flexible scaffold that iteratively generates, tests, and refines code solutions. This scaffold branches into multiple variations, like Flat (testing many independent ideas) or Multi-AIDE (debugging and improving top candidates). At each step, the agent modifies the training script, runs it on fixed hardware, and analyzes results to guide the next attempt. The process emphasizes practical adjustments, such as optimizing algorithms or hardware usage, without demanding deep theoretical expertise. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> After this, the researchers connect these tasks sequentially. The benchmark mirrors real-world research where each advance builds on the last. For example, later tasks might require implementing attention optimizations or mixed-precision training. The scaffold’s design allows agents to learn from failures; buggy solutions trigger debugging, while promising ones spawn further refinements. This structure tests not just coding skill but also how agents handle compounding innovations, a key aspect of scientific progress. </p></td></tr><tr><td id="results-and-evaluations" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Results and Evaluations</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Agents were evaluated using Fraction of Speedup Recovered (FSR), measuring how much of a human record’s training-time improvement they replicated. Without hints, performance was poor, as all models recovered ≤20% of the speedup. Even with hints, results varied: top models like o3-mini achieved 40-46% FSR using pseudocode or combined hints, while others like Gemini-2.5-Pro lagged. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/20d10ad3-f889-4630-857d-74bc8f26a88c/nanogpt_speedrun51.png?t=1751988322" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> We noticed that the performance dropped sharply for later, more complex records, and code-similarity analysis showed agents often missed key changes. Surprisingly, adding external documentation sometimes hurt performance, suggesting agents struggle to integrate new knowledge. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/098c7b8c-cad0-4de5-8ad7-8c989ae4febf/fig_tuned_nanogpt.png?t=1751988367" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> When the agents built on their own prior solutions, these initial gains faded quickly, and by the third task, speedups vanished. These results highlight a limitation: current agents can’t reliably chain improvements like humans. The benchmark reveals reproducibility as a critical bottleneck for autonomous research. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fUNb4GdFo9D3F8WuLArtoV5sElgytBlvJRzI9WtI92aTaZdR9PZL1lPAfAbTRvOOqzv15UmKmeUPCJsJoBIYI-iViQMxGpisbl8WSFsTyBS0m0N9hm6YMQ3uTutu1r5y/4i0/aCJz-ogfTc-OmNGA6T64eQ/h8/h001.bPI4BYOQ_u5of9RAGwhY41FPFin4nqVVtVxjffPywzE" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Read Full Paper </a></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="fast-and-simplex-2-simplicial-atten" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;">Fast and Simplex: 2-Simplicial Attention in Triton</h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style=""><i>Roy et al. [Meta, University of Texas at Austin]</i></span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;"> ♥ 1k </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> Attention </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span></p></td></tr><tr><td id="introduction-to-2-simplicial-transf" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Introduction to 2-simplicial Transformers</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Large language models are advancing rapidly, but they face a growing challenge: the shortage of high-quality training data. Current scaling laws suggest models need ever more tokens as they grow larger, but internet-scale datasets are nearing their limits. This creates a token efficiency problem, how can we achieve better results without endless data? Enter the 2-simplicial Transformer. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> This architecture uses a new attention mechanism to extract more value from each token. This is especially useful for those tasks that require complex reasoning like math and coding. Instead of relying on standard dot-product attention, it introduces a higher-order approach that fundamentally changes how models scale under token constraints. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/d5a669a2-7616-4fea-a1bb-9fbf4363f29d/image.png?t=1751987912" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td id="inner-workings-of-the-2-simplicial-" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Inner workings of the 2-simplicial Transformer</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The researchers of this study decided to replace the standard bilinear attention mechanism with a trilinear one. Where traditional attention computes pairwise interactions (query-key), the 2-simplicial version adds a third dimension. Each query now interacts with two keys simultaneously through a three-way tensor, capturing more nuanced relationships in sequences. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> This allows the model to identify patterns that bilinear attention might miss, such as logical dependencies across multiple tokens. The trilinear operation involves multiplying queries with two distinct key projections, then applying a softmax across the combined dimensions before merging value vectors. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/eda12d9d-365d-4e39-b129-78e953e7d236/image.png?t=1751987963" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Visualization of sliding window 2-simplical attention and Tiling to reduce 2-simplicial einsum</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> To manage computational costs, the approach uses localized attention windows. Rather than processing full sequences, which would scale cubically, each query only attends to a limited neighborhood of keys. For example, a window size of 512 × 32 balances efficiency and coverage, keeping latency comparable to standard attention at 48k context lengths. A custom Triton kernel optimizes this by tiling operations across GPU cores and tensor cores, achieving near-peak hardware utilization. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/3772d7e4-c2da-47cd-8877-a0eb9aadaa2a/image.png?t=1751987935" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The architecture integrates these layers sparingly, using them in every fourth block to distribute computational load. Combined with grouped query attention, this maintains training stability while adding minimal overhead compared to dense attention variants. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/6379caca-37a4-4797-8ff4-042f086a821f/image.png?t=1751988069" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td id="evaluation-and-implications-of-2-si" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Evaluation and implications of 2-simplicial Transformers</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The researchers tested this method on reasoning-heavy benchmarks like GSM8K and MMLU, larger 2-simplicial models (3.5B active parameters) showed consistent gains over standard Transformers. The negative log-likelihood <span style="font-weight:700;"><b>improved by 2.27%</b></span> on math tasks and 2.15% on complex question answering. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> This indicates a better grasp of underlying patterns. Additionally, scaling law analysis revealed a steeper exponent for parameter efficiency, meaning that for fixed token budgets, performance improves faster as models grow. This contrasts with Chinchilla-style scaling, which requires proportional token increases. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/0d579b07-2661-4f01-beae-7b86f1d45d9e/image.png?t=1751988093" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Negative log-likelihood of Transformer versus 2-simplicial attention.</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> These results suggest a new way to improve LLMs when enough data isn't present. This architecture can extract richer signals per token. In future, it could be used to explore hybrid attention hierarchies or refined windowing strategies. For now, the 2-simplicial Transformer shows us that rethinking attention mechanisms, not just scaling data, might unlock the next leap in reasoning capabilities. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/e2576e6d-2144-4272-9090-d88f5a26ea80/image.png?t=1751988166" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>R2 and residuals measuring goodness of fit.</p></td></tr></table></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fUNb4GdFo9D3F8WuLArtoV5sElgytBlvJRzI9WtI92akr3ETxPEnUepN6xC7Wsus0lOpKkFZynNtA5VZSDcmrGLbn6JIBIFSOmP4kVrT9TD1ff1QvUX0DOkDXq8pwW5f/4i0/aCJz-ogfTc-OmNGA6T64eQ/h9/h001.MiSOOqAUDU2YoFoP_R27_QMGK5TnqeNNfWSaAnBxaZg" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Read Full Paper </a></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="does-math-reasoning-improve-general" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;">Does Math Reasoning Improve General LLM Capabilities? Understanding Transferability of LLM Reasoning</h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style=""><i>Xing et al. [CUHK, CityU, Tencent AI Lab]</i></span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;"> ♥ 820 </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> LLM Training </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> bycloud’s pick </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span></p></td></tr><tr><td align="center" valign="top" class="dd"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWgtnzmc5iIdrufGflHNXodhFV5oIrYR49sPZqEu6R6SbC4ZZkDJOayEapM5fva6GhCwpTDaVDa1OmN-Hl0EoG6vn5eg--VlayIWtkK2XHRj3d7cqWkemySJdCfGoEbCuaQ/4i0/aCJz-ogfTc-OmNGA6T64eQ/h10/h001.P0u56cNf13ykziSnDho_XzQrRK8dbHZo8DHnAV1J1I8" style="text-decoration:none;" target="_blank"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"><tr><td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"><tr><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="embed-img mob-stack" align="center" valign="top" style="width:35%;min-height:100px;vertical-align:middle;display:none;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWgtnzmc5iIdrufGflHNXodhFV5oIrYR49sPZqEu6R6Sbd2t48fDVYbBtinB-E0oyLZ-cXV8ADcojl0qxjp-SiL3pYq25MFUHzPsvmCRGOEFoSWnvvopULjPIyvvS2AQ6vQ/4i0/aCJz-ogfTc-OmNGA6T64eQ/h11/h001.B99M8JaFrtlQDIEx9B72D3fYWYt2nKZTth5veQ0-704" style="text-decoration:none;" target="_blank"><img src="https://cdn-thumbnails.huggingface.co/social-thumbnails/ReasoningTransferability.png" width="100%" style="display:block;"/></a></td><!--[if mso]></tr></table></td><![endif]--></tr><tr><td align="center" valign="top" class="cc"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="left" valign="top" class="l"><p>ReasoningTransferability (Reasoning Transfer)</p></td></tr><tr><td align="left" valign="top" class="m"><p>Org profile for Reasoning Transfer on Hugging Face, the AI community building the future.</p></td></tr><tr><td align="left" valign="bottom" class="n" style="vertical-align:bottom;padding-top:12px;"><p style="word-break:break-word;">huggingface.co/ReasoningTransferability</p></td></tr></table></td><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="mob-hide" align="center" valign="top" style="width:35%;min-height:100px;padding:0px 0px 0px 12px;vertical-align:middle;"><img src="https://cdn-thumbnails.huggingface.co/social-thumbnails/ReasoningTransferability.png" width="100%" style="display:block;"/></td><!--[if mso]></tr></table></td><![endif]--></tr></table></td></tr></table></a></td></tr><tr><td id="introduction-to-transferability-of-" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Introduction to Transferability of Reasoning in LLMs</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Large language models have made impressive strides in solving math problems, with new models regularly topping leaderboards on benchmarks like MATH and AIME. But as these scores improve, people have started wondering whether these gains reflect genuine reasoning skills that transfer to other domains, or are models simply overfitting to narrow mathematical patterns? </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> This paper tackles that puzzle by evaluating over 20 reasoning-tuned models across diverse tasks, from scientific QA and coding to conversational dialogue and instruction following. Surprisingly, most models excelling at math failed to generalize their abilities elsewhere. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/445d78cf-441a-4fe3-86a0-7f01f19c02c5/image.png?t=1751987146" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td id="mechanism-behind-transferability-di" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Mechanism Behind Transferability Differences</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> To isolate the impact of fine-tuning methods, researchers conducted controlled experiments using Qwen3-14B models trained exclusively on math data. The researchers compared two approaches: supervised fine-tuning (SFT), where models learn from pre-written solutions, and reinforcement learning (RL), where models optimize for correct answers through trial and error. Both methods improved math performance, but they diverged dramatically elsewhere. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/c62e3a61-75e0-4ebc-b57b-6716257f0a6c/image.png?t=1751987744" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Transferability of mathematical reasoning to other reasoning and non-reasoning tasks.</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> SFT models showed significant "catastrophic forgetting", their general capabilities eroded. When the researchers tested them on non-math queries, their internal representations drifted substantially. Researchers measured this using principal component analysis on latent activations, which revealed distorted feature spaces that disrupted general task performance. Token distribution analysis further showed SFT models shifting probabilities erratically across many irrelevant tokens, like adding logical operators to simple emails. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> On the other hand, the RL models maintained stable representations. Their latent spaces stayed aligned with the base model, preserving versatility. Token shifts were minimal and targeted: only math-relevant terms like "add" or "define" changed during reasoning tasks, while everyday language remained intact. This selective adaptation allowed RL models to extend math gains to coding puzzles or medical QA without compromising conversational ability. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/91f2068e-e8a8-4960-984d-56746aba0173/image.png?t=1751987824" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td id="performance-and-implications-of-tra" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Performance and Implications of Transferable Reasoning</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The results of this study were quite surprising. On math tasks, RL models slightly outperformed SFT (53.8% vs. 49.8% average). But the real gap emerged elsewhere: RL surged ahead <span style="font-weight:700;"><b>by 17.1% on coding benchmarks</b></span> and <span style="font-weight:700;"><b>24% on non-reasoning</b></span> tasks like email drafting, while SFT models regressed. The Transferability Index, which is a metric quantifying cross-domain generalization, confirmed RL’s edge and showed positive gains across all categories. SFT models scored negatively on non-reasoning work, losing up to 41% performance. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/ebfa2a4c-6bad-4e6e-aad8-a96b08fac3fa/image.png?t=1751987781" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The results of this study challenge common practices. These results indicate that SFT is useful for specialized tasks, however it risks fragmenting a model’s core capabilities. RL’s on-policy learning, while computationally heavier, anchors improvements to the model’s existing knowledge, making reasoning gains portable. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> This means that developers should prioritize RL when building general-purpose assistants. Future work could explore hybrid methods, but one lesson is clear: True reasoning isn’t just solving equations; it’s adapting those skills to the messy, multifaceted world beyond the chalkboard. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fUNb4GdFo9D3F8WuLArtoV5sElgytBlvJRzI9WtI92ZZa1lQliaulnF2e_O6S2wPtJc704ywsKUNEaRsGp_PS1GXqhBjKSEXScSPWSvztWDNR4v3eZztScc2x0Gpsfp0/4i0/aCJz-ogfTc-OmNGA6T64eQ/h12/h001.KP6ShQD-yFvEpYzLFYiIAgEvNCjRlnGwr6_XLR0f4F0" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Read Full Paper </a></td></tr></table></td></tr><tr><td class="dd" style="padding: 20px;"><table width="100%" cellpadding="0" cellspacing="0" role="none" style="max-width:520px;margin:0 auto;"><tr><td class="q" style="padding:16px 16px 6px 16px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoDDFT6eh5Nsg0xYVQj-h6I3o9m2k79_qw4izMYhmcI36Qr6VMQeHEBZlbsUFK9MetMvxPAdnshzanCKIEiJAxBrVQKTGQ3etjgcQg-U5Wz9bEcMsKM3Zsk5Ir0WA7trW_lCxf7IVkrx7LlqrgRg1pxg/4i0/aCJz-ogfTc-OmNGA6T64eQ/h13/h001._imu-1RPZTS51KdBv5Xwg1toV-w8clZGSYPOckKtPDw" style="text-decoration:none !important;"><table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"><tr><td width="100%" style="padding: 0 0 14px 0;text-decoration:none;width:100%;"><table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"><tr><td width="36" style="width:36px;"><img src="https://pbs.twimg.com/profile_images/1698572487909400576/BvncwnrP_normal.jpg" alt="tw profile: The AI Timeline" style="display:block;width:36px;height:36px;border-radius:50%;border:0;"/></td><td width="400" style="padding:0 0 0 8px;text-decoration:none;"><span style="display:block;font-size:14px;color:#1c2022;font-weight:700;"> The AI Timeline </span><span style="display:block;color:#697882;font-size:14px;"> @TheAITimeline </span></td><td width="24" align="right" style="vertical-align:text-top;"><img width="24" height="24" loading="lazy" alt="tw" style="border:0;" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_logo.png"/></td></tr></table></td></tr><tr></tr><tr><td style="word-break:break-word;"><p>This week's top AI/ML research papers:</p><p>- 2 Simplicial Attention <br>- UMA <br>- Transition Matching <br>- GLM-4.1V-Thinking <br>- The Trilemma of Truth in LLMs <br>- Do Vision-Language Models Have Internal World Models? <br>- The Automated LLM Speedrunning Benchmark <br>- RoboScape <br>- Test-Time Scaling with</p></td></tr><tr><td style="padding:12px 0 0 0;"></td></tr><tr><td align="center" style="padding:8px 0 0 0;width:480px;"><img src="https://pbs.twimg.com/media/GvJ4GTfaAAAWx7K.jpg" width="480" height="auto" style="display:block;border:1px solid #E1E8ED;border-radius:5px;width:100%;max-width:480px;height:auto;"/></td></tr><tr><td height="8" style="line-height:1px;font-size:1px;height:8px;"> </td></tr><tr><td align="left" valign="top" class="s"><p>6:36 AM • Jul 6, 2025</p></td></tr><tr><td height="10" style="line-height: 1px; font-size: 1px; height: 10px;"> </td></tr><tr><td height="1" bgcolor="#e1e8ed" style="line-height:0px;font-size:0px;height:1px;"></td></tr><tr><td height="10" style="line-height:1px;font-size:1px;height:10px;"> </td></tr><tr><td align="left" valign="top" class="s"><p><b style="color:#1C2022">556</b> Likes <b style="color:#1C2022">72</b> Retweets </p></td></tr><tr><td align="left" valign="top" class="s"><div align="center" style="text-align:center;margin-top:4px;margin-bottom:4px;padding:8px;border:1px solid #ccd6dd;border-radius:9999px;color:#1B95E0"><b>3 Replies</b></div></td></tr></table></a></td></tr></table></td></tr></table></td></tr></table></td></tr><tr><td align="center" valign="top"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td><tr><td class="b" align="center" valign="top" bgcolor="#2a2a2a" style="padding:0px 0px 0px 0px;border-style:solid;border-width: 0px 0px 0px 0px;border-color: #2a2a2a;border-bottom-left-radius:10px;border-bottom-right-radius:10px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top" bgcolor="#73ddff" style="padding:12px"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td><span style="padding-left:1px;"></span></td><td align="center" valign="middle" width="75" style="width:75px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.1muhFWIqieRYpaJ-FbWSCQqcWoV4NNHHr5SkP9THApWuHAAlWLQxI3Q_IqFmt_DcyAxeC8jDApCnHmMSBGpBb5sgtimvBYgxRX-Rp7s0F3LjCHoSwdhr83OBqRFhJ1y_/4i0/aCJz-ogfTc-OmNGA6T64eQ/h14/h001.Un8xTqF2-hM9iZaXVUGx9_V9McY0Yt2Z5i6nnX5K-74" style="text-decoration:none;"><img width="22" height="22" alt="tw" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_dark.png"/></a></td><td align="center" valign="middle" width="75" style="width:75px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmBoQnQ9VXnB2zTxBG4HeHBgjMqVxpoXRdj01cjwyoVlHgiebEOgBvwHtevoVpsSvpn3Q1di2ml6sb3cBM-X6IStQbj_zQSVGWJ8AAmPw2en2/4i0/aCJz-ogfTc-OmNGA6T64eQ/h15/h001.r3RhuH4irWXQrUpWYdA22c4Af_3m6ksibljXEK8o8dg" style="text-decoration:none;"><img width="22" height="16" alt="yt" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/youtube_dark.png"/></a></td><td><span style="padding-left:1px;"></span></td></tr></table></td></tr><tr><td height="10" style="line-height:1px;font-size:1px;height:10px;"> </td></tr><tr><td class="w" align="center" valign="top" style="padding:15px 15px 15px 15px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top"><p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"> Update your email preferences or unsubscribe <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsBhEpz-DJgyVFmavJPa0OyKRRnvw4o7XGyvIv7PRofnmqC0vZb3LccumJl3xbFv6AhRT2fBIfXAasMoTMrnqPBePXDvwDVOGT3JIl0PhYvWXkKeNIJXdRc2px7z7PrC_Yg5rzyIWu81h3nd9dCP779B8wh7ryl1h1d-9_Aceg6lVWJo1srKKQ0Y53owul3Yh6ANtm8DkLFMOWfGK-mTraFSnqfB1cevYsJ2oEFsQN-GmeKhX6Tc-GEtGqiiUuCJRaP1WtBKjy-Ei0IdljaxQ2NUaEJtUqouTMlx-DIcEGfOoxB2hET6aN1ujewiYE_-XjtQ-sDWLqH8Zch4dvyJtJmjsGEAlQjiTzSpO7x0oiMXGRJjIpQeChTBeWuYqzHHKZt-BZvh5gRipLtXWbeoVITLopo9cUpWsNzr5gGIJnprcJn1oNnsmPGQDss8TxXnDY3U0U-3A0_XtmQ_l6mciE_aUDGcHJcbCIasCcMwxaigfB5L88_ye1D48tpVeUez2rG-Udme5xAlgBf21D6xYT0w0d2uB6vYTg5BN-ApRpFdf4wrrT6JqcpQSi9HyQYmTY0m7OnTgTDxdJJI41NlvML1NDzjiKRIYutLItQ-Ak2ZdMQyLrCeJdwrH9-CFx4NZqT6p_doTZdPy-pgqZC3TjCQY2dnQkT1VvNnTzjlMfKMLPY3gbHi7aYZuQvbx_O-StllzqdYSkndx690nhaoyibtuy1oAzH8I0Bsuph12bJCZ-cKduW3G6b8TxQ2Txyh_SQ/4i0/aCJz-ogfTc-OmNGA6T64eQ/h16/h001.5lW49X27GZ22GM3fmxMLb-8jpOAwSs6_UYUv3pU6BD8" style="text-decoration:underline;text-decoration-color:#FFFFFF!important;color:#FFFFFF!important;"> here</a></p><p class="copyright" style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"> © 2025 bycloudai </p><p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"> 228 Park Ave S, #29976, New York, New York 10003, United States </p></td></tr><tr style="display: table-row !important;"><td align="center" valign="top" style="padding-top:20px;" style="display:table-cell !important;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="display:table !important;"><tr style="display:table-row !important;"><td class="u" align="center" valign="middle" height="32" style="height:32px;display:table-cell !important; max-height: 32px !important;margin:0px !important; background-color: #ffffff !important;"><a style="line-height:32px !important;text-decoration:none;display:block !important;" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28olDWFpV5DDKfdk_OdOKOiiR_58nDByOA42iKMTZ5LbvJlnCq979UAUA_08YdKzRadyG7ErnSw39gMgF_F3cvrYUSv5NvjInuVaB1HMUohTA4LPseQoMu87xfH0nFN4OJxU_c9qRE3fgm2BgeVKhh7AFJmJ18j5SR0Na8awSg1OrJ0_F7jcnpsnTcjBIeDxPbYC-b06Xe1WrTmcqW7Exn_VA2CSGOy6CFS9dYf40uAH/4i0/aCJz-ogfTc-OmNGA6T64eQ/h17/h001.ilmlazWA5mg5MS-4qb8MXg4L_r8JOANbY8aqGvWzm2Y"><img src="https://media.beehiiv.com/output-onlinepngtools.png" width="16" alt="beehiiv logo" style="display:inline-block !important;max-width:16px !important; vertical-align:-3px !important;width: 16px !important;" border="0"/><span style="padding-left:11px !important;display: inline-block !important;">Powered by beehiiv</span></a></td></tr></table></td></tr><tr><td align="left" valign="top" height="2" style="height:2px;"><a href='https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWsHIaP4XNp0WgUYqLvHcKk_3uqk_KIkz4ddLinhFbud6JuxLFdSUhYnR7b1NSsmbtzXNGNblnEEMKUtkCAjkn8Y/4i0/aCJz-ogfTc-OmNGA6T64eQ/h18/h001.D-CGE6eTRdsAPSiZFS_wkLBgARS-nh-bLRMpmGrtEfU' style="color: #2a2a2a !important; cursor: default; font-size: 1px; text-decoration: none;"> Terms of Service </a></td></tr></table></td></tr></table></td></tr></td></tr></table></td></tr></table></td></tr></table></td></tr></table></div></body></html>