Email: Defeating Nondeterminism in LLM Inference

Subject:
Defeating Nondeterminism in LLM Inference
Received:
9/16/2025, 6:41:49 PM
From:
"The AI Timeline" <weekly@mail.bycloud.ai>
To:
desert.glitter.81@getmynews.app
&lt;!DOCTYPE html&gt;&lt;html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" style="font-size:16px;"&gt;&lt;head&gt;&lt;/head&gt;&lt;head&gt;&lt;meta charset="utf-8"/&gt;&lt;!--[if !mso]&gt;&lt;!--&gt;&lt;meta http-equiv="X-UA-Compatible" content="IE=edge"/&gt;&lt;!--&lt;![endif]--&gt;&lt;meta name="viewport" content="width=device-width,initial-scale=1"/&gt;&lt;meta name="x-apple-disable-message-reformatting"/&gt;&lt;meta name="format-detection" content="telephone=no,address=no,email=no,date=no,url=no"/&gt;&lt;meta name="color-scheme" content="light"/&gt;&lt;meta name="supported-color-schemes" content="light"/&gt;&lt;title&gt;Defeating Nondeterminism in LLM Inference&lt;/title&gt;&lt;!--[if mso]&gt;&lt;xml&gt;&lt;o:OfficeDocumentSettings&gt;&lt;o:AllowPNG/&gt;&lt;o:PixelsPerInch&gt;96&lt;/o:PixelsPerInch&gt;&lt;/o:OfficeDocumentSettings&gt;&lt;/xml&gt;&lt;![endif]--&gt;&lt;style&gt;
  :root { color-scheme: light; supported-color-schemes: light; }
  body { margin: 0; padding: 0; min-width: 100%!important; -ms-text-size-adjust: 100% !important; -webkit-transform: scale(1) !important; -webkit-text-size-adjust: 100% !important; -webkit-font-smoothing: antialiased !important; }
  .body { word-wrap: normal; word-spacing:normal; }
  table.mso { width: 100%; border-collapse: collapse; padding: 0; table-layout: fixed; }
  img { border: 0; outline: none; }
  table {  mso-table-lspace: 0px; mso-table-rspace: 0px; }
  td, a, span {  mso-line-height-rule: exactly; }
  #root [x-apple-data-detectors=true],
  a[x-apple-data-detectors=true],
  #MessageViewBody a { color: inherit !important; text-decoration: inherit !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important; }
  span.MsoHyperlink { color: inherit !important; mso-style-priority: 99 !important; }
  span.MsoHyperlinkFollowed { color: inherit !important; mso-style-priority: 99 !important; }
  .a { background-color:#dedede; }
  .b { background-color:#2a2a2a; }
  .c  { background-color:#ffffff; }
  .d { background-color:#fff0c8; }
  .d2 { background-color:#FFFFFF; }
  .d3 { background-color:#FFFFFF; }
  h1 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h2 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h3 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h4 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h5 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h6 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h1, h1 a, h2, h2 a, h3, h3 a, h4, h4 a, h5, h5 a, h6, h6 a, ul, li, ol, p, p a { margin: 0;padding: 0; }
  h1 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:28px;color:#2A2A2A;line-height:42px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h2 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:24px;color:#2A2A2A;line-height:36px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h3 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:20px;color:#2A2A2A;line-height:30px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h4 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:18px;color:#2A2A2A;line-height:27px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h5 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:16px;color:#2A2A2A;line-height:24px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h6 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:14px;color:#2A2A2A;line-height:21px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  p { font-family:'Georgia','Times New Roman',serif;font-weight:400;color:#2D2D2D;font-size:16px;line-height:24px;padding-bottom:8px;padding-top:8px;mso-margin-top-alt:8px;mso-margin-bottom-alt:8px; }
  p a, .e a, ul a, li a, .h a, .h2 a, .h3 a { word-break:break-word;color:#2C81E5 !important;text-decoration:none;font-style:italic; }
  p a span, .e a span, ul a span, li a span { color: inherit }
  p .bold { font-weight:bold;color:#2D2D2D; }
  p span[style*="font-size"] { line-height: 1.6; }
  .f p { font-size:12px;line-height:15px;color:#2D2D2D;padding:0; }
  .f p a { color:#2D2D2D !important; }
  .g p { font-family:'Helvetica',Arial,sans-serif;font-size:14px;line-height:20px;font-weight:normal;margin:0; }
  .g p a  { text-decoration: underline; }
  .i p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; }
  .i p a { color:#2D2D2D !important; }
  .i2 p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; }
  .i2 p a { color:#2D2D2D !important; }
  .i3 p { font-family:'Helvetica',Arial,sans-serif;line-height:43px;font-size:24px;color:#2D2D2D; }
  .i3 p a { color:#2D2D2D !important; }
  .h p a { color:#595959 !important; }
  .h2 p a { color:#595959 !important; }
  .h3 p a { color:#595959 !important; }
  .f p a, .i p a, .i2 p a, .i3 p a, .h p a, .h2 p a, .h3 p a { text-decoration:underline; }
  .j { border-top:3px solid #ffeb2d; }
  .k p { padding-left:15px;padding-bottom:0px;padding-top:6px;mso-margin-top-alt:6px;mso-margin-bottom-alt:0px;mso-margin-left-alt:15px; }
  .o { background-color:#FFFFFF;border:1px solid #F1F1F1;border-radius:5px; }
  .o p { font-family:'Helvetica',Arial,sans-serif;padding:0px;margin:0px; }
  .l p,
  .l p a, .l a { font-size:14px;line-height:20px;font-weight: bold;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; }
  .m p,
  .m p a { font-size:13px;line-height:18px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; }
  .n p,
  .n p a { font-size:12px;line-height:17px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; }
  .p { background-color:#FFFFFF;max-width:520px;border:1px solid #E1E8ED;border:1px solid rgba(80, 80, 80, 0.3);border-radius:5px; }
  .q { font-size:16px;font-family:Helvetica,Roboto,Calibri,sans-serif !important;border:1px solid #e1e8ed;border:1px solid rgba(80, 80, 80, 0.3);border-radius:10px;background-color:#FFFFFF; }
  .q p { font-size:16px;font-family:system-ui,Helvetica,Roboto,Calibri,sans-serif !important;color:#222222;padding:4px 0; }
  .r { border:1px solid #E1E8ED !important;border-radius:5px; }
  .s p { font-size: 14px; line-height: 17px; font-weight: 400; color: #697882; text-decoration: none; }
  .t p { font-family:'Helvetica',Arial,sans-serif;font-size:12px;line-height:18px;font-weight:400;color:#000000;font-style:italic;padding:4px 0px 0px; }
  .v { border-radius:10px;border:solid 0px #DFD150;background-color:#2C81E5;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;color:#FFFFFF; }
  .v a { text-decoration:none;display:block;color:#FFFFFF; }
  .w p { font-size:12px;line-height:15px;font-weight:400;color:#FFFFFF; }
  .w p a { text-decoration: underline !important;color:#FFFFFF !important; }
  ul { font-family:'Helvetica',Arial,sans-serif;margin:0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:disc;font-size:16px; }
  ul &gt; li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:disc; }
  ol { font-family:'Helvetica',Arial,sans-serif;margin: 0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:decimal;font-size:16px; }
  ol &gt; li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; }
  .e h3,
  .e p,
  .e span { padding-bottom:0px;padding-top:0px;mso-margin-top-alt:0px;mso-margin-bottom-alt:0px; }
  .e span,
  .e li { font-family:'Helvetica',Arial,sans-serif;font-size:16px;color:#2D2D2D;line-height:24px; }
  .rec { font-family:  ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji" !important; }
  .rec__button:hover { background-color: #f9fafb !important; }
  .copyright a {color: inherit !important; text-decoration: none !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important;}
  .txt_social p { padding: 0; word-break: break-all; }
  .table, .table-c, .table-h { border: 1px solid #C0C0C0; }
  .table-c { padding:5px; background-color:#FFFFFF; }
  .table-c p { color: #2D2D2D; font-family:'Helvetica',Arial,sans-serif !important;overflow-wrap: break-word; }
  .table-h { padding:5px; background-color:#F1F1F1; }
  .table-h p { color: #2A2A2A; font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif !important;overflow-wrap: break-word; }
  @media only screen and (max-width:667px) {
    .aa, .w100pc { width: 100% !important; }
    .bb img { width: 100% !important; height: auto !important; max-width: none !important; }
    .cc { padding: 0px 8px !important; }
    .ee { padding-top:10px !important;padding-bottom:10px !important; }
    .ff ul, .ff ol { margin: 0px 0px 0px 10px !important;padding: 0px !important; }
    .ff li { margin:10px 0px 0px 10px !important; }
    .r {height:140px !important;}
    .s p { font-size:13px !important;line-height:15px !important; }
    .mob-hide {display:none !important;}
    .mob-show {display: block !important; width: auto !important; overflow: visible !important; float: none !important; max-height: inherit !important; line-height: inherit !important;}
    .mob-stack {width:100% !important;display:block !important;}
    .mob-w-full {width:100% !important;}
    .mob-block {display:block !important;}
    .embed-img {padding:0px 0px 12px 0px !important;}
    .socialShare {padding-top:15px !important;}
    .rec { padding-left:15px!important;padding-right:15px!important; }
    .bodyWrapper { padding:7px 4px 7px 4px !important; }
    .social-mobile {float:left !important;margin-top:10px !important;}
  }
  @media screen and (max-width: 480px) {
    u + .a .gg { width: 100% !important; width: 100vw !important; }
    .tok-heart { padding-top:75% !important; }
    .tok-play { padding-top: 250px !important; }
  }
  @media screen and (max-width: 320px) {
    .tok-heart { padding-top:65% !important; }
  }
  .u { border: 1px solid #CACACA !important; border-radius: 2px !important; background-color: #ffffff !important; padding: 0px 13px 0px 13px !important; font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif !important;font-size: 12px !important; color: #767676 !important; }
  .u a { text-decoration: none; display: block !important; color: #767676 !important; margin: 0px !important; }
  .u span, .u img { color: #767676 !important;margin:0px !important; max-height:32px !important;background-color:#ffffff !important; }
&lt;/style&gt;&lt;!--[if mso]&gt;&lt;style type="text/css"&gt;
    h1, h2, h3, h4, h5, h6 {font-family: Arial, sans-serif !important;}
    body, table, td, p, a, span {font-family: Arial, sans-serif !important;}
    sup { font-size: 100% !important;vertical-align: .5em !important;mso-text-raise: -1.5% !important;line-height: 0 !important; }
    ul { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; }
    ul li { margin-left: 0px !important; mso-special-format: decimal; }
    ol { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; }
    ol li { margin-left: 0px !important; mso-special-format: decimal; }
    li.listItem { margin-left:15px !important; margin-top:0px !important; }
    .paddingDesktop { padding: 10px 0 !important; }
    .edm_outlooklist { margin-left: -20px !important; }
    .embedImage { display:none !important; }
&lt;/style&gt;&lt;![endif]--&gt;&lt;!-- __merge_tags_in_links__ --&gt;&lt;style&gt;
        @font-face {
          font-family: 'Open Sans';
          font-style: normal;
          font-weight: 700;
          font-display: swap;
          src: url('https://fonts.gstatic.com/s/opensans/v40/memSYaGs126MiZpBA-UvWbX2vVnXBbObj2OVZyOOSr4dVJWUgsg-1x4gaVIUwaEQbjA.woff2') format('woff2');
        }

        @font-face {
          font-family: 'Open Sans';
          font-style: italic;
          font-weight: 700;
          font-display: swap;
          src: url('https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@1,700&display=swap') format('woff2');
        }
&lt;/style&gt;&lt;/head&gt;&lt;body class="a" style="margin:0px auto;padding:0px;word-wrap:normal;word-spacing:normal;background-color:#dedede;"&gt;&lt;div role="article" aria-roledescription="email" aria-label="email_name" lang="en" style="font-size:1rem"&gt;&lt;div style="display:none;max-height:0px;overflow:hidden;"&gt; Plus more about Analog in-memory computing attention mechanism for fast and energy-efficient large language models, and the Majority is not always right: RL training for solution aggregation &#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204; &lt;/div&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" align="center" cellpadding="0" class="gg"&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;table role="none" width="670" border="0" cellspacing="0" cellpadding="0" class="aa" style="width:670px;table-layout:fixed;"&gt;&lt;tr&gt;&lt;td class="bodyWrapper" align="center" valign="top" style="padding:7px 7px 7px 7px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="border-width:0px 0px 0px 0px;border-style: solid; border-color: #2a2a2a;border-radius:10px 10px 0px 0px;background-color:#ffffff;" class="c"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr id="header"&gt;&lt;td style="padding:15px 15px 0px 15px;"&gt;&lt;div style="padding-top:0px;padding-right:0px;padding-bottom:20px;padding-left:0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td class="f" align="right" valign="top"&gt;&lt;p&gt; September 16, 2025 &nbsp; | &nbsp; &lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EzdpTjobYNmb67GGsU-4QAqyxJjr1xXszLFtR5KyIWZDv7bT5GbTEixV2sVYSZORObkpsNzwAKes2PkZO8c1lvvlyO6n1zjG5JspDIl1UE8zUiOeIHo_iG_TaH_RTuucRSwjIPCm3r8mEbhFyj3QAL-j9X7IzO98cy2GlHbrS8YdZJ3E-xkGYHTvUPm3O8rXASSzaIHeCERYQzMo2-EsTz0hlCxvQ_Cg8rjUeWAeoFuucXjVcGl2x8ZuofX38-hZ-WuZB0fDq-Sg6NQ1ZQY1m3qPyyrePktItHaiqpkSWQchMdjAFUK6zRrBjZQaaxO3vxpVGFo5D1h9ZxF_jSgn4NL6Afcbj8srsDSHt9Dm4OG-zsNhu75HlyQcbrkN9TRY5I1inmsOKNUWiW_Ac-bMBZhgZ_D4h920a4WgICFbITPXcfFSLkSOMYjWu9FgPmNw_sdzYyXsk4x4HWH9wPlp6UWCmjx2BJdklDPk3qvI6ignhs1Mt98WmDBetbE8tF7YWf_7i5LqwJ0BYi5f-aEnh-H0IK4G9NKRTItYUQuD9pN2g1DncKuh3lyWAeL50E1vbjNjrWJZcaTtO_-YeSwGel0bsK9wXL0O6Qdf2IazxBj-G7l9MptLyq6tGvEFVZQjUxRTf5kYP0jK9TBJjK-0aU9HN_smbEKo3fapNoRmAJlA_cd5MjBY9VIKglCJ-53RWfTgwjrpCpEmGeljkOuUqVHS4PV3sK5Q2BNVvbFrFQzPzLhUduOoFzdGqQvSMbD3olcNLiRumddYzk2OYox06fP/4jy/vWJH3ApHQoazUFasn9oYhQ/h0/h001.MG7h6ugPX9Vpyl88byH5dImc09dLBRC9HPu3k5aOxqE"&gt;&lt;span class="translation_missing" title="translation missing: en.templates.posts.email.header.read_online"&gt;Read Online&lt;/span&gt;&lt;/a&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="center" valign="top" style="padding:15px 0;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;h1 style="text-align:left;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-weight:Bold;font-size:32px;color:#2A2A2A;padding:2px 0;line-height:38px;"&gt; Defeating Nondeterminism in LLM Inference &lt;/h1&gt;&lt;p style="text-align:left;font-family:'Helvetica',Arial,sans-serif;font-weight:normal;font-size:20px;color:#3E3E3E;padding:5px 0;line-height:24px;"&gt; Plus more about Analog in-memory computing attention mechanism for fast and energy-efficient large language models, and the Majority is not always right: RL training for solution aggregation &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="line-height:0;"&gt;&lt;div data-open-tracking="true"&gt; &lt;img src="https://elink4f7.mail.bycloud.ai/ss/o/u001.3wmUuY8gEWd4_869a_eXcg/4jy/vWJH3ApHQoazUFasn9oYhQ/ho.gif" alt="" width="1" height="1" border="0" style="height:1px !important;width:1px !important;border-width:0 !important;margin-top:0 !important;margin-bottom:0 !important;margin-right:0 !important;margin-left:0 !important;padding-top:0 !important;padding-bottom:0 !important;padding-right:0 !important;padding-left:0 !important;"/&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr id="content-blocks"&gt;&lt;td class="email-card-body" align="center" valign="top" style="padding-bottom:28px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td id="nov-18-th-nov-24-th-33-latest-ai-re" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h6 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:87.5%;"&gt;&lt;i&gt;Sep 8th ~ Sep 16th&lt;/i&gt;&lt;br&gt;&lt;i&gt;#73 Latest AI Research Explained Simply&lt;/i&gt;&lt;/h6&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="industry-news-in-1-line" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;🗞️ Industry News in 1 Line&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"&gt;&lt;div style="margin-left:0px;" class="edm_outlooklist"&gt;&lt;ol start="1" style="list-style-type:decimal;margin:0px 0px;padding:0px 0px 0px 0px;"&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 4.2k&lt;/span&gt;&lt;/span&gt; &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.ZsobsZmG6kUZ4LjqczYBVMd9-lmVPP9PQdUWuwV36ycFKyw_M8n_9ZDHM9y8KGmsZEwk1rvwXXLyr9q7RXCxe1LWaOJnPE7czaviYeXjucSy16SZUsMHePkZxJ0JwKNPrXufHpOpLm1bpyADnIRWtBOa2YiBOG7HplsF5fd4eQc58UC1zL1kMEQDbYoGeupP3HD_aMrAmTO7vDUOP9Dk2XFv_N3JUODt4GG0gMlDBAgULU-pdFSFO_d5wu_ns1v1Yj3ijIV6AcfX9Uo3SmqytA/4jy/vWJH3ApHQoazUFasn9oYhQ/h1/h001.BEbC_uG4a7C1UZrnmJsSgZNLLKr9lJrRZrzB7Y9LPBU" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Alibaba&#39;s Qwen&lt;/span&gt;&lt;/a&gt; team has introduced &lt;i&gt;Qwen3-Next-80B-A3B&lt;/i&gt;, its latest LLM with 80 billion parameters, which &lt;b&gt;only activates 3 billion per token&lt;/b&gt;. This design enables it to train 10 times more cheaply and run inference 10 times faster than the Qwen3-32B model, particularly with context lengths over 32,000 tokens. If you are interested in exploring this new architecture, you can access the model and its technical details through the &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.wcXdj6dB6nd1Cx4inzJNk2uZGa3BYLsLAcCl27a2qvL79ycUjXI03LdL-VtG6cVDUeoHFHB244bqi4x7JqMl8hqLmeJweh7y2tNc-YO2NJHbqj5L2kntsgutX_hwrvi72ING7UzaPfILslA77De1XT41eeBFpf1Pcd9TQvHq8PgcbWyG8w3ROZLxoVl5R25HDiYMOJRAdILJODJJLSWTi8XHFL8rQFUFAhkSJUsFfZegvSYWQQ6xU71Gudmd2QCHBr2OD9MsoXv7mdWRg5c1Yh0vF6daO2RQUqiXVOtwpQi9zYyPTnu4YP-awO-3bK4X5_0BolDVMNpbh7pJbdzXbQ/4jy/vWJH3ApHQoazUFasn9oYhQ/h2/h001.xSfJoGfoAtpf8_EXcZ6DxTpIZhHzFkWeOkMMdv6qaIw" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;official blog post&lt;/span&gt;&lt;/a&gt; and platforms like &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWu5HHpecwStOph8cXvproQ9lxqiNkKoMv3OieKkHYlf6i0Gng2Q8-C9y-QKRuaBUPVFM5lNhmnzNVcWuc4Yn7d83OgR1nHgOthvsbS2NwZJlLu4gUnsRX4GCLUvAcnShJclXwercsOVaDg2BlmF0C8SF_jJliNdUE4F2hKevpqr5wEjWz3xLmVfpI8K3e_9BSLkZxj_dBjX18P5vthtuYdiPwbhQymukQ5SviWBKhKhY7gPPyuf9xgjvExTAJtr0Vh9aUezwDZ38CyWLORntjB0/4jy/vWJH3ApHQoazUFasn9oYhQ/h3/h001.nrmbC4NLCIrCBKDQIJVFKcfZ3tEbZ8mMcQ89B0pOyAc" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Hugging Face&lt;/span&gt;&lt;/a&gt; and &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxbBqUjAzI8c4yNhiTrsgEvOwDU_fCr5vvPKmrkpST4Zepbj0Z0P8sgmrnEmg8aIfb5DdQzR4rxavyNgCMgtFX12Ac4JRxmoNxQ3OiLsBvhDdoAAC7IlDVY57qASp7BBGjeVC8WWtfUK96cuoem5kB30b01_8iO_KP9Obf_dXF62lgu5CdXRu_w2yWvjCn-CMbOYiRJrrorAWaDN3PNJYj8dL3GgVY9-kbTX-OYO6J9RZtnsf2pTFhTZpACLA_slYvEGM5Yy-UkcPTOqDWVgbix5fdsCBTr6BVb12G-6Ij7-d/4jy/vWJH3ApHQoazUFasn9oYhQ/h4/h001.Yww6hXtB0S3_N7yppRNRI_1zhs-laZ1RCOS69xKBa5U" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;ModelScope&lt;/span&gt;&lt;/a&gt;. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/a4ff9475-bf91-4175-a3a1-f43213942d20/archtecture.png?t=1758039771" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px;"&gt;&lt;p&gt;Qwen3-Next Hybrid Architecture: Gated DeltaNet + Gated Attention&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 315&lt;/span&gt;&lt;/span&gt; Researchers from MBZUAI and G42 have released &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j2xkj7d1E4sIWyFTr7tmA5_feWq8Pbv01saVel1hF93NsSnsIG4p3XSxpbLvOVFZ-3RQTO9b_HnALpeuOcTuMFw1NmIWZu4YUye13XJ7RLH4wYC668JqBocGgu3poz5JFmPBQV59JnN5blpx3DmtpWJi5BBrNRhnEqCy269EQvWPgHcYaS5Pgiv6Q60QVH7M5VGSbDUIT4My-TE7t2KzaycbWYcEM47P4bgT03VpIoC_8GuWzQYrBiocPOeqZ8IW-Dcj7G5mHyp6pEqJSiNRmFZM/4jy/vWJH3ApHQoazUFasn9oYhQ/h5/h001.2mut1Uh7dSu_0K6AF0xh5sFMrpcmXiysdwlIxiGi6pU" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;K2 Think&lt;/span&gt;&lt;/a&gt;, a new 32B parameter open-source AI system designed for advanced reasoning. The model delivers frontier-level performance and outperforms systems more than &lt;b&gt;20 times its size&lt;/b&gt; by using long chain-of-thought supervised fine-tuning. &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j29WPtVAVL6c4HRTW8-rT5R3TbrEBNd7FwglyT_wcLWlOkcMtdkO06WETTLdC1XbsdMG-ShGg1ql99hca-BKpE8lOIllh5_tZH8pOIr3N46bfjBhCi2VETXwLN61_Ajalgyv6qj6BxCgPJdqKM0VKVme8VKoDqNjvqZRLI7bv5ry6DkCDXCb475S8AVlbIAxMv-v_wjhFVHcxxWUXa63Srph1-bLmIkVuNpxWsz4e_80HFKj97gW_drk6SDZxDPnp7Q/4jy/vWJH3ApHQoazUFasn9oYhQ/h6/h001.pDS1StpofIRSeU4U_Ro_JJjJCzr4C0SWJWHiL6cwI0w" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;You can try K2 Think right now&lt;/span&gt;&lt;/a&gt; in your browser. &lt;/p&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 349&lt;/span&gt;&lt;/span&gt; The Interaction Company has launched &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.zNfxTwpJFmrsCuJJphGRkNHnCwuLZ6MtS7-d0bcqKNPEQdKjdSJSnkvzulvejD79y29YyNmevGGwjyZOoJ7wgDBXiAoTbtjaMLUxYZjjwBZyhhqYCrowbqM9p0TcGv9wUc9gWIsvyTEHYqbSYAyqnD81BEgH2NpEhOsy-wV139hF-0SuX1JMX9MVVt-hG2CRVsfNDsai5WuzzBT3MhHjDvXvX1fAC9FamrC8hoGAetiU2PCJUAdkDXae624t-3nNTtdouPuGAAwss8PcvrHvog/4jy/vWJH3ApHQoazUFasn9oYhQ/h7/h001._fR51idPub12XujioBYcWZkfslEESPOQtuDXlWwVYnw" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Poke&lt;/span&gt;&lt;/a&gt;, which is gaining positive attention on social media for its text-based interface that lets users accomplish a variety of tasks. Many people like its conversational nature for everything from getting daily jacket recommendations to analyzing YouTube channel statistics. It can even handle follow-up messages and corrections like a natural conversation. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/8c960861-b2fb-4b62-a663-911160a915c2/image.png?t=1758047156" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 1.3k&lt;/span&gt;&lt;/span&gt; &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j2_AeM8b1QJ1wG3H-cXPMb-tpE1iJsJ_2eny5JweSUegD7gglYgnNwo0p4JAjROicgzje9WMjaUrkWbg0AcI40bJNdreG1gr7KDcYvTO0LeSs1vcQz54aIyqJ3gyIsrnjo13uuf3IEPP1I6X28ScX0ObLUhtAWBwT1mCUViBt6TBKTdXgTGY2b_SNSF9_sOKx0jy3BwH1UXsbr632_jGrR97I9uHpM0RedoXjhn6cIk6EkX5Zsb7m6dEdML6A8hyzqA/4jy/vWJH3ApHQoazUFasn9oYhQ/h8/h001.6uFcc68PezPLnqJTwxiG-5H7wONihomk1kSI7wTBLO4" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;H Company&lt;/span&gt;&lt;/a&gt; has released the &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWh9nwkGbQ8Q0hzh4nLq3_8hobE10IeyNRMK1HDJHekyBgouZUN65tjKWVdkAX26WqHJk_RaoBHF4sDrq7D0ScRkByKTw9m0zxZalk6lI-s-i24uZE1twIYvMZcYxcl9CygWYz5uKUs40KsIUSTfvVqpQM_EKdDdZ8bfb6S43S1HBopfDKlAskCjZYb-HabSjhcIGqCrpJYm8-d0qMRJaoUCcKusDmSTAAI5q6J_U87NFgf7-rcJLb7ZTq3KbTegHGCzEMsCzWZkt5505nhvAM3s/4jy/vWJH3ApHQoazUFasn9oYhQ/h9/h001.ZRhH36VmbIJzEzA0MFJE74Jg6CDaBXBEy7eFHklUGw4" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Holo1.5 series&lt;/span&gt;&lt;/a&gt;, a new family of open-source models designed to power &quot;Computer Use&quot; agents that can interact with web, desktop, and mobile applications on a user&#39;s behalf. These state-of-the-art models excel at localizing user interface elements, answering questions about on-screen content, and providing powerful productivity tools. &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.gXpuuKq1N_-6FJq3Q1UVsauifQBpOuxVZv4pndNMnGzkfAB-ARygv8akLCN6tDKaULIyZOKH-fy8n0r2hWqlzEjVnaeFiansUo5EqBj3FVvqiW1KssACUixeFazk8mqvXQg7OiMSXXtEfsC5Lttpl6KdcuSH6eQQFrSvuuCdU8Lmxq0GWbjwWxGahnDGJpiJSpUg7ieGwUBMFMD7-8LjBHsdLaU2Y_uyOWkpWjNtLZGjS4G7mVZ8fCdhLtHWDEjzg41Xt8acAKzXuDtF9bpxJCc1qDfSiSfApBAYu3uxuiV0vmMggeBETIRmA7N9h4p5UZTa_wWKI3o1grpIFodhOA/4jy/vWJH3ApHQoazUFasn9oYhQ/h10/h001.h99POGVK8tKb79Gff9osiF5UvxCsHNhkEy-qaxz8r6g" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;View a replay of the Holo1.5 session&lt;/span&gt;&lt;/a&gt; in your browser to see what it can do for you. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/2c0795ca-d8c4-4a5a-a07d-2fef1b47597c/image.png?t=1758047444" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 413&lt;/span&gt;&lt;/span&gt; &lt;span style=""&gt;Google has released &lt;/span&gt;&lt;span style=""&gt;&lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.gXpuuKq1N_-6FJq3Q1UVsYoH4jrmyKS_63j1cggnNZVWuqyWexKOBZN68BSlB0U0LsJoTZmKIMG84isQBO8YKo5NOW68XpTEAMGviGRYiMwfP3Yp-bd1lKCAnO9o2pcmP-Hl2Of7ataf6VFtYPHONl4sL_A6ZjItwc1xmipTHuBhJT9rbAaGi3RqH0FgU1sHwy959h5OsTulvAgu0NA47czB5gzEzrTmfKz4nEAAITpuMJ9eEUgwH7XURRCm8eRvL92ZCco4jAWUbtSlXfmyGuOGfXsC8MI7s7slk2mQbaZgf5RavTUtGWjaC1Hv3IsYqpzcGiSS0G8kflPp7xO_Gez-rekxow6BuHERXWk-hio/4jy/vWJH3ApHQoazUFasn9oYhQ/h11/h001.MO4XU5ZDjGuo4AYT1sCMjUdL3FFOvme9aC7ZfvNnc-g" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;VaultGemma&lt;/span&gt;&lt;/a&gt;&lt;/span&gt;&lt;span style=""&gt;, which is a new LLM trained from scratch with &lt;/span&gt;&lt;span style=""&gt;&lt;b&gt;differential privacy&lt;/b&gt;&lt;/span&gt;&lt;span style=""&gt; to protect sensitive information in the training data. This new model is available in a &lt;/span&gt;&lt;span style=""&gt;&lt;b&gt;1B-parameter open version,&lt;/b&gt;&lt;/span&gt;&lt;span style=""&gt; and it allows researchers and developers to build privacy-preserving AI applications. You can &lt;/span&gt;&lt;span style=""&gt;&lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWrG74ntPIjKAlg5mrGAobI-Uxcn3758McHckwIr2DLWx2AWRBEugL9uPcPeyMSNDKuPvPmj9a-fVK9qDMy197smxWieHIxCbzL3Mn6B73pDb4qEouSCiRYlYJmgAQmHMR9q1f5jr4tBLUFpugJHN3mFtBCvHhWdyDLK26G8b0rgfKdEeyaH3lhvPmle-yt2MsG8vC_zo0MzTfCTUOc2VHhIP9S_L2FMQsOC36Rzrx4PC26n_JsieOAVnxxZXeW5yyQ/4jy/vWJH3ApHQoazUFasn9oYhQ/h12/h001.JBHJpCYxCx5bTxkea-ZLnaY9di2fx5Pafr7Fzle-ZzI" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;download VaultGemma from Hugging Face&lt;/span&gt;&lt;/a&gt;&lt;/span&gt;&lt;span style=""&gt; today.&lt;/span&gt;&lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/2490ac73-af7b-412a-bc44-6d7f87b11705/VaultGemma1_ScalingLaws.width-1250.png?t=1758047723" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px;"&gt;&lt;p&gt;The structure of DP scaling laws. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;/ol&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="transparent" style="background-color:transparent;border-color:#2C81E5;border-style:solid;border-width:5px;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;Support My Newsletter&lt;/span&gt;&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;"&gt;As I aim to keep this newsletter free forever, your support means a lot. If you like reading The AI Timeline, consider forwarding it to another research enthusiast, It helps us keep this up for free!&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="font-size:0px;line-height:0px;padding:30px 0px 30px;" class="dd"&gt;&lt;table class="j" role="none" width="50%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt; &nbsp; &lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;Share The AI Timeline&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; You currently have &lt;strong&gt;0&lt;/strong&gt; referrals. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; display:none;width:0px;max-height:0px;overflow:hidden;mso-hide:all;height:0;font-size:0;max-height:0;line-height:0;margin:0 auto;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 0;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:313px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsGNUqyW5TiZkyMsF1yreu0byy2KW36J1wDdpoLuXg2TU1F1OW8OHoHaU4-ZmrZpPU4RN-crQCEimD190CSn9fPuxpIRojBJyu1VfV5KtQD3QMVdSg2JrjEj5-xm4r4E12Whf08itqPCb9Q5W0X4rt3ubYkqCmWnLeZpmb3_RZcbIk0UE5wZnFLCQJHLFs0qZ0OGpXp89o1HU4mWIBur5Or4tQGm5M_Y8m5PvTEfYfxLRyrcRv7GyVs5oLtFfiySZ2SqtZypLA-h50h61p0uPiA7iA_PiMqlVLtM-87XL33VZi05_O3UTpWE_0nAzFRJ4TW1ayz3_vn4Zlp9IERdbnnAd_1kPLD4lAQcR5PRXgtpCf_V7T5A1YeE-oSsx2bn3oZCBM1LnuIxgrqgY5mYi2qQ2VyTSjk3SgK0z-A29AcFs8nj6wiymJAsqsU86YZksDOVCIegJ2zdFMPelVvAHQsDPNhDoz8ddcWWFxEa8rcaxn6Kgzp9j00-hijZJTs5ZrXdo4M5HZ8LqhSYi7rmO90X8-JVwy4nw3SXKVTUVhITk/4jy/vWJH3ApHQoazUFasn9oYhQ/h13/h001.JPaat_ytJ3z0YUwusvVht0P6ybejNt4x4F6ppTRsR-0" rel="noopener noreferrer nofollow" style="text-decoration:none;" target="_blank"&gt;&lt;img src="" alt="" height="auto" width="313" style="display:block;width:100%;" border="0"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr class="btn_row"&gt;&lt;td valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:left;width:100%;word-break:break-word;" class="dd"&gt;&lt;table width="100%" role="none" border="0" cellspacing="0" cellpadding="0" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="left" valign="middle"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td style="background-color:#2C81E5;border-radius:8px;mso-padding-alt:14px 20px;" class="btn"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsGNUqyW5TiZkyMsF1yreu0byy2KW36J1wDdpoLuXg2TU1F1OW8OHoHaU4-ZmrZpPU4RN-crQCEimD190CSn9fPuxpIRojBJyu1VfV5KtQD3QMVdSg2JrjEj5-xm4r4E12Whf08itqPCb9Q5W0X4rt3ubYkqCmWnLeZpmb3_RZcbIk0UE5wZnFLCQJHLFs0qZ0OGpXp89o1HU4mWIBur5Or4tQGm5M_Y8m5PvTEfYfxLRyrcRv7GyVs5oLtFfiySZ2SqtZypLA-h50h61p0uPiA7iA_PiMqlVLtM-87XL33VZi05_O3UTpWE_0nAzFRJ4TW1ayz3_vn4Zlp9IERdbnnAd_1kPLD4lAQcR5PRXgtpCf_V7T5A1YeE-oSsx2bn3oZCBM1LnuIxgrqgY5mYi2qQ2VyTSjk3SgK0z-A29AcFs8nj6wiymJAsqsU86YZksDOVCIegJ2zdFMPelVvAHQsDPNhDoz8ddcWWFxEa8rcaxn6Kgzp9j00-hijZJTs5ZrXdo4M5HZ8LqhSYi7rmO90X8-JVwy4nw3SXKVTUVhITk/4jy/vWJH3ApHQoazUFasn9oYhQ/h14/h001.EV09yjsp9RAcT2DVV9oVGM_fWvoPzVPM2aRcRxETLeM" target="_blank" rel="noopener noreferrer nofollow" style="background-color:#2C81E5;border-radius:8px;color:#FFFFFF;display:inline-block;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-size:16px;font-weight:normal;line-height:18px;padding:14px 20px;text-decoration:none;"&gt; Click to Share &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Or copy and paste this link to others: &lt;a class="link" href="https://mail.bycloud.ai/subscribe?ref=6SqUHb8KiF&_bhlid=bf7a73b936aab597b0df9777ef50b28c5a049d32" target="_blank" rel="noopener noreferrer nofollow" clicktracking="off"&gt;&lt;span&gt;https://mail.bycloud.ai/subscribe?ref=6SqUHb8KiF&lt;/span&gt;&lt;/a&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="font-size:0px;line-height:0px;padding:30px 0px 30px;" class="dd"&gt;&lt;table class="j" role="none" width="50%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt; &nbsp; &lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr class="btn_row"&gt;&lt;td valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table width="100%" role="none" border="0" cellspacing="0" cellpadding="0" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td style="background-color:#2C81E5;border-radius:8px;mso-padding-alt:14px 20px;" class="btn"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.zNfxTwpJFmrsCuJJphGRkKSrCVph9-fOYkcjx4VfJRyUw-Iv7GHKoTyxc57iFdcabeJrUAXVgdJXAkTcc7bS82ZF6NEkQHkUBgqGaM66RDbyMBpTK8pOBl6aVCc1cb8u9ZaBbf2Yaf3XKd396iKIKdLOfCoNPeo3qwSP9NnbQQkOnSXZ6mwClVurb89DmZI4P-rd4oLoDLml8i_GbAofvu7obqT3MHMO6xs27pqA8_dP_HARse6a8Xf7quLMQLcg/4jy/vWJH3ApHQoazUFasn9oYhQ/h15/h001.Jnq1CpV1DbK0f56602ynDFL6FhAadq0c7AYT5SB0ynI" target="_blank" rel="noopener noreferrer nofollow" style="background-color:#2C81E5;border-radius:8px;color:#FFFFFF;display:inline-block;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-size:16px;font-weight:normal;line-height:18px;padding:14px 20px;text-decoration:none;"&gt; Check Out My Patreon &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoGymQ3NNPtd5dE5MV_8UgjLbPKYFbBPtV6oAT4VYSncNiXOMe0ETHKViEemkGKRuti97gDsqlNJXOC9cMEoZt4vqGEMzd3CYIoAvubE-GTMMdUISuJ7id-aPVhWy5ENXsE0a01U-3mkTCzNO02--Ug9x5tzzeVEw2xRSIgiLsyD4IGVjCBnU0xT3JidcbA4JzBw2IxD39UfmZWyFZhvMg0X8xNcx8Dc5rcKRfA-J_HdR/4jy/vWJH3ApHQoazUFasn9oYhQ/h16/h001.wUaIhlTTLDwby-x1nlJd5V6gfe4xEaXZ4aEXYPXNTqc" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Advertise with The AI Timeline! &lt;/span&gt;&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="defeating-nondeterminism-in-llm-inf" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;Defeating Nondeterminism in LLM Inference&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;i&gt;He et al. [&lt;/i&gt;&lt;/span&gt;&lt;i&gt;Thinking Machines&lt;/i&gt;&lt;span style=""&gt;&lt;i&gt;]&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt; ♥ 7.3K &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt;Nondeterminism&lt;/span&gt;&lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="introduction-to-defeating-nondeterm" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;Introduction to Defeating Nondeterminism in LLM Inference&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Have you ever noticed that asking the same question to a language model multiple times gives different answers, even with settings that should make it deterministic? This inconsistency is a real obstacle for researchers and developers who rely on reproducible results. The common belief is that this nondeterminism stems from floating-point arithmetic and concurrency in GPUs. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/ed4023f7-8e8e-4bcc-9734-132e454968fe/image.png?t=1758044982" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; This paper argues that batch sizes change the order of operations inside key computational kernels, leading to unpredictable outputs. Their research tackles this by making these kernels &quot;batch-invariant,&quot; ensuring consistent results no matter how many requests are processed together. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/83f9e257-448c-480a-b9c0-b041532f02e4/image.png?t=1758044921" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;From the perspective of an individual user, the other concurrent users are not an &quot;input&quot; to the system but rather a nondeterministic property of the system.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="inner-workings-of-batch-invariant-k" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;Inner Workings of Batch-Invariant Kernels&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The team focused on three core operations in transformer models: RMSNorm, matrix multiplication, and attention. Each of these involves reductions (summing values across dimensions), which are sensitive to batch size changes. Normally, kernels optimize performance by adjusting their reduction strategy based on batch size, but this variability breaks consistency. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; For RMSNorm, the solution is straightforward: use a data-parallel approach where each batch element is processed independently within a single core, avoiding inter-core communication that introduces order changes. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/f321dd0e-b163-4351-bcca-f9969992741c/image.png?t=1758045083" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Data Parallel RMSNorm&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Matrix multiplication poses a bigger challenge due to the use of tensor cores and tile-based processing for efficiency. Here, the researchers enforce a fixed kernel configuration across all batch sizes, sacrificing some performance but ensuring that reduction orders remain unchanged. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Attention mechanisms add another layer of complexity, as they handle sequences that can be split or cached differently during inference. By standardizing how key-value caches are updated and using fixed split sizes for reductions, they maintain identical numerics regardless of how tokens are processed, making attention batch-invariant as well. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/7b958b2f-f104-4d52-a08e-cf4d0dfe7445/image.png?t=1758045116" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Data Parallel Matmul&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="evaluation-and-implications-for-ai-" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;Evaluation and Implications for AI Research&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; When tested with the Qwen model, standard inference produced 80 different completions for the same prompt, while the batch-invariant approach produced identical outputs every time. Performance benchmarks show a slowdown (from 26 seconds to 42-55 seconds for processing 1000 sequences) but this is a manageable trade-off for determinism. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/27f1c750-227e-4840-8b65-97f9cab508bb/image.png?t=1758045171" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; More importantly, this work enables true on-policy reinforcement learning, where sampling and training align perfectly with zero KL divergence. This breakthrough not only makes LLM inference reproducible but also opens doors for more reliable AI systems in research and production. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr class="btn_row"&gt;&lt;td valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table width="100%" role="none" border="0" cellspacing="0" cellpadding="0" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td style="background-color:#2C81E5;border-radius:8px;mso-padding-alt:14px 20px;" class="btn"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoNbIkHNMOZ2sB_IVhZt1lI0orvCbd54YKcDt3dHgg1HTg-8U6viQm0RB40_7MavuLT0hfeU7BIWrx4bTv_1P-x6AdVG2rRVRS2Wi-bc1OE8tSGMAiA65cIvIUAco6QTJboC11hxHQbVug6qyEv0zsh2peNjHbLz5i4lr3BUAkMYNLgSF3ByCE87TBi2R7GYn5tP10LARX-odlfrtP_FOZPAikMVDG1zw-UIeJSwyk0IytnZNEAoJvW1AUOizqJ9oP_sNa7ZBBSkPDyLiRmXBytR8b_Q4AqH8WeJCESdyOxWbfpxwZJ4h2Q6ARxjXOa0xeQ/4jy/vWJH3ApHQoazUFasn9oYhQ/h17/h001.lGibQltypOT6pWQIQJHmYkZbWTltgGpE1MmbS7gJZOU" target="_blank" rel="noopener noreferrer nofollow" style="background-color:#2C81E5;border-radius:8px;color:#FFFFFF;display:inline-block;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-size:16px;font-weight:normal;line-height:18px;padding:14px 20px;text-decoration:none;"&gt; Read Full Paper &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="analog-inmemory-computing-attention" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;Analog in-memory computing attention mechanism for fast and energy-efficient large language models&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;i&gt;Leroux&lt;/i&gt;&lt;span style=""&gt;&lt;i&gt; et al. [&lt;/i&gt;&lt;/span&gt;&lt;i&gt;Forschungszentrum Jülich, RWTH Aachen&lt;/i&gt;&lt;span style=""&gt;&lt;i&gt;]&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt; ♥ 2.2k &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; LLM Memory Computing &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="introduction-to-aggregation-in-larg" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;Introduction to Aggregation in Large Language Models&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Scaling up test-time compute by generating multiple solutions and selecting among them has become a common strategy for improving large language models on difficult reasoning tasks. However, standard aggregation methods like majority voting or reward model ranking often don’t perform well, especially when correct answers appear only in the minority. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The paper introduces &lt;b&gt;AggLM&lt;/b&gt;, a method that uses reinforcement learning from verifiable rewards to train a model to review, reconcile, and synthesize answers from multiple candidate solutions. By carefully balancing easy and hard examples during training, AggLM learns to recover correct minority answers while still effectively handling straightforward cases. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/54b832db-f995-443d-bcfb-568f716440aa/43588_2025_854_Fig1_HTML.png?t=1758045390" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Building blocks of the analog hardware attention mechanism.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="inner-workings-of-agg-lm" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;Inner Workings of AggLM&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; AggLM works by first sampling multiple independent solutions from a base language model for a given problem. These candidate solutions are then passed to an aggregator model, which is trained to produce a final answer by analyzing and combining the inputs. The aggregator reasons over the solutions, corrects mistakes, and fills in gaps where needed instead of just picking the most frequent answer. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/f2dc095f-985f-48ef-b41e-a52cfdbc1f53/43588_2025_854_Fig2_HTML.png?t=1758045428" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Analog hardware attention pipeline.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Training happens through reinforcement learning with verifiable rewards. For each problem, the model receives a reward of 1 if its aggregated answer matches the ground truth and zero otherwise. The model receives easy examples, which are those where the majority answer is correct, as well as hard ones, where the majority is wrong. This balance helps the model learn both to trust correct majorities and to recover correct minority answers. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/0401993c-83d6-489b-be43-5ccb794161d7/43588_2025_854_Fig3_HTML.png?t=1758045487" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Multi-tile design and layout for multi-head attention.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The aggregator can be the same model as the solution generator or a separate one. In practice, the paper shows that both setups work well. The training uses Group-Relative Policy Optimization (GRPO), a reinforcement learning method that helps the model improve its aggregation policy over time based on group-level rewards. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/91c70a28-7796-4703-9b6c-488233391797/43588_2025_854_Fig4_HTML.png?t=1758045529" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Hardware model adaptation and training.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="evaluation-and-performance-of-agg-l" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;Evaluation and Performance of AggLM&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; AggLM was tested on several challenging math competition datasets, including AIME and HMMT problems. When aggregating solutions from a 1.7B parameter model, AggLM &lt;b&gt;outperformed majority&lt;/b&gt; voting and reward-based selection methods. It improved accuracy from 35% to 50% on one dataset. It also generalized well to solutions from stronger models, like an 8B parameter model, even though it was only trained on data from the smaller model. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/929a09ab-693c-4799-a763-42e6ce238e51/43588_2025_854_Fig5_HTML.png?t=1758045588" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Analog hardware attention mechanism accuracy and performance.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The method proved &lt;b&gt;more token-efficient&lt;/b&gt; than generating a larger number of solutions for majority voting. It performed especially well when candidate solutions were diverse and the correct answer was not in the majority. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr class="btn_row"&gt;&lt;td valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table width="100%" role="none" border="0" cellspacing="0" cellpadding="0" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td style="background-color:#2C81E5;border-radius:8px;mso-padding-alt:14px 20px;" class="btn"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j23lDtJNGVB8PI0GW7NAnXAMpckatrDYeNZhcO4Y3EhxU4EHQAyPW-zSdgBIAhmJsA2ulpvpMp26nW-zpY1plOai6ArwRauJFL0NyCe3iuH962F9fhonpKOhy-p7oY2joVnSXbklOdfmov9f-FIBEBQimvFIhLca3867R6nW9zMZ8VxCztTuLcRj5oLOOo2ATLFMRMQBBSJVK7yqSWpwfRUfN08_0ihW9u_aoOnMx8Xl3-8Or2jtk9rmnqyG1xtBS331qtxN5x-vMvSx4BhczoFY/4jy/vWJH3ApHQoazUFasn9oYhQ/h18/h001.IiF8xBt6oI8oofXFabIEUexcpgi9M8OShT9fsjVleS8" target="_blank" rel="noopener noreferrer nofollow" style="background-color:#2C81E5;border-radius:8px;color:#FFFFFF;display:inline-block;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-size:16px;font-weight:normal;line-height:18px;padding:14px 20px;text-decoration:none;"&gt; Read Full Paper &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="the-majority-is-not-always-right-rl" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;The Majority is not always right: RL training for solution aggregation&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;i&gt;Zhao et al. [&lt;/i&gt;&lt;/span&gt;&lt;i&gt;FAIR at Meta, CMU&lt;/i&gt;&lt;span style=""&gt;&lt;i&gt;]&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt; ♥ 714 &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; LLM RL &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; bycloud’s pick &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="introduction-to-in-memory-computing" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;Introduction to In-Memory Computing for Transformer Efficiency&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Transformer models have become the backbone of modern AI, but they come with a significant drawback: high energy consumption and latency, especially during inference. This is largely due to the need to repeatedly load key-value (KV) cache projections from GPU memory into static RAM at each generation step. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; This paper introduces a &lt;b&gt;hardware solution&lt;/b&gt; using in-memory computing with gain cells to store token projections and compute attention operations directly in analog. This approach avoids the costly data transfers that slow down traditional GPUs and opens the door to much faster and more energy-efficient generative transformers. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="inner-working-of-the-gain-cell-atte" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;Inner Working of the Gain-Cell Attention Architecture&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; This paper suggests using gain-cell arrays to store keys and values while performing the dot products needed for self-attention in the analog domain. Gain cells act as both memory and multipliers: they store multi-level voltages representing token projections and generate output currents proportional to the product of stored values and input pulses. This allows the attention mechanism to compute without repeatedly moving data between memory and processing units. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/5757b560-e2c0-4b4f-8e35-97444854e3fb/image.png?t=1758046122" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Given a task and sampled LLM solutions as input, AggLM uses reasoning to review, reconcile, and synthesize a final aggregated solution, which is typically superior to the original solutions.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; To handle the non-idealities of analog computation (like nonlinearities and value decay over time) the authors designed charge-to-pulse circuits that convert integrated currents into pulse-width modulated signals. These pulses are used for intermediate computation and activation, replacing power-hungry analog-to-digital converters. The architecture also uses sliding window attention to limit the number of tokens attended to at each step, making the hardware design scalable and practical. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The adaptation algorithm &lt;b&gt;allows pre-trained models like GPT-2 to work on this non-ideal hardware&lt;/b&gt; without full retraining. By fine-tuning scaling parameters layer by layer, the system matches the statistical behavior of ideal digital models, ensuring that performance remains high even with analog imperfections and quantized operations. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="evaluation-and-performance-of-the-h" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;Evaluation and Performance of the Hardware Design&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The proposed architecture shows remarkable efficiency improvements. Compared to GPUs, it &lt;b&gt;reduces attention latency by up to two orders of magnitude&lt;/b&gt; and energy consumption by up to four orders. For example, it &lt;b&gt;achieves energy savings of 40,000x over&lt;/b&gt; an embedded GPU and &lt;b&gt;70,000x over a data-center GPU&lt;/b&gt; when performing attention computations. These gains come from performing dot products fully in analog, minimizing data movement, and using efficient pulse-based signaling. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; In terms of accuracy, the adapted model performs comparably to GPT-2 on standard language tasks like LAMBADA, HellaSwag, and WikiText-2, even with hardware constraints like HardSigmoid activation instead of softmax and low-precision quantization. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:15px;padding-right:15px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:626px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/c5929d7d-4ace-4f43-b4c2-69c69c0e3007/image.png?t=1758046193" alt="" height="auto" width="626" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:626px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Comparison of training the solution model versus training the aggregator model on the same data, in either separate or multitask settings.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr class="btn_row"&gt;&lt;td valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table width="100%" role="none" border="0" cellspacing="0" cellpadding="0" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td style="background-color:#2C81E5;border-radius:8px;mso-padding-alt:14px 20px;" class="btn"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fUNb4GdFo9D3F8WuLArtoV5sElgytBlvJRzI9WtI92bVEiH0bTFjhJ1avtH_F0dYoaymVjp9az8sQeYTSWQhoJ23Lu2tj5dqAPj1TbVBLzJzRxbYED05mx7ybIzzN9WS7GNAjW8mTPWxOdLhdUfG8dQMOkPf4z5_9a5GFkItKag9eU6k-LrU5l__Mv9LE5kjCAGHCiMhOvZBZPtyZbmR6UBuhtWt28SSSSJ8lHEGAIV9V1jVEaWLIa2_1CLX-UwD/4jy/vWJH3ApHQoazUFasn9oYhQ/h19/h001.xJvcd3hTnZy1SpgZRLlJb7um7SLQ_tZeGq-GRxtlTn0" target="_blank" rel="noopener noreferrer nofollow" style="background-color:#2C81E5;border-radius:8px;color:#FFFFFF;display:inline-block;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-size:16px;font-weight:normal;line-height:18px;padding:14px 20px;text-decoration:none;"&gt; Read Full Paper &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="center" valign="top" style="padding:20px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmCcGxcre9qGrlwGwf1j9N2WMbzP-h2MC0LjOXbi8mslTS45WBFyoGwRzW79zOoS1bkSxmeU6O7AaImVVprExPEjJM3l5eG6ziYRfBopPqp48h9fOMLnmxGhj3KPNTa-5uz4DC3KoiBopQtgOB_CZwNewA4IfmR2UcnZLK-SA3iWERf8obbFGlXRZT8qust2x0Q2f0zF8_ynESz1Odv_TH3xV3SmYhZpPBNT_UKgsjkTs4gSNzOUQjIjOghahAm-qfA/4jy/vWJH3ApHQoazUFasn9oYhQ/h20/h001.01PrdQ9-XfDUg83eW98xn9TmPPnwMENS9lhILlERBzk" style="text-decoration:none;"&gt;&lt;table align="center" width="100%" cellpadding="0" cellspacing="0" border="0" role="none" style="max-width:520px;margin:0 auto;"&gt;&lt;tr&gt;&lt;td class="p" width="100%" style="padding:2px;border:none;"&gt;&lt;table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:100%;"&gt;&lt;div style="max-height:0;position:relative;opacity:0.999;width:100%;mso-hide:all;"&gt;&lt;div style="display:inline-block;width:100%;padding-top:25%;"&gt;&lt;img width="20%" height="auto" loading="lazy" alt="" style="border:0;" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/youtube_play_icon.png"/&gt;&lt;/div&gt;&lt;/div&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmCcGxcre9qGrlwGwf1j9N2WMbzP-h2MC0LjOXbi8mslTS45WBFyoGwRzW79zOoS1bkSxmeU6O7AaImVVprExPEjJM3l5eG6ziYRfBopPqp48h9fOMLnmxGhj3KPNTa-5uz4DC3KoiBopQtgOB_CZwNewA4IfmR2UcnZLK-SA3iWERf8obbFGlXRZT8qust2x0cvl2DqM72yhD7CAA8OIgBih92_prWhsLKsDnSnf444R3ie6tuBTz8M80jO63B5I-Q/4jy/vWJH3ApHQoazUFasn9oYhQ/h21/h001.1a1KFdlOo1B8onLO1qkcM81VWnxxygkiZ0e7Uco1Y-8" style="text-decoration:none;"&gt;&lt;img src="https://i.ytimg.com/vi/z3awgfU4yno/maxresdefault.jpg" width="480" height="auto" loading="lazy" alt="YouTube video by bycloud" style="display:block;height:auto;border:0;outline:none;text-decoration:none;background-color:#000000;width:100%;"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;p style="font-size:12px;font-weight:500;font-style:italic;font-family:Helvetica, Calibri, sans-serif;color: #686a6d; padding-top:0 !important;padding-bottom:6px !important; padding-left:4px !important;"&gt; The LLM&#39;s RL Revelation We Didn&#39;t See Coming &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt;&lt;tr&gt;&lt;td class="b" align="center" valign="top" bgcolor="#2a2a2a" style="padding:0px 0px 0px 0px;border-style:solid;border-width: 0px 0px 0px 0px;border-color: #2a2a2a;border-bottom-left-radius:10px;border-bottom-right-radius:10px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top" bgcolor="#73ddff" style="padding:12px"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt;&lt;span style="padding-left:1px;"&gt;&lt;/span&gt;&lt;/td&gt;&lt;td align="center" valign="middle" width="75" style="width:75px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.1muhFWIqieRYpaJ-FbWSCQqcWoV4NNHHr5SkP9THApWUO4S9eWSDBFDMKQ83N4CY1l4kXQTU9YnEEqXRrg_2uhS94rQOKDl60C6UO57Zu1mJCFi_zhfD-a_hnJHdTQ7Eaj0LIihbe87FlpTrGvHh2r3SEYs-wQpeFGkAXJk_vHiPJo-CxPhtvEQvC7vwVFs2M9jP4XVrusTBj1Fki0D0AIhp_DYCryQI7g7HmUnKZamgLrtzX2vUhz9ZYj19vNgB/4jy/vWJH3ApHQoazUFasn9oYhQ/h22/h001.83jSzs4nz2Qb0TIWik2wptoXgzDOL9zjIw6Ry2FL_CE" style="text-decoration:none;"&gt;&lt;img width="22" height="22" alt="tw" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_dark.png"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;td align="center" valign="middle" width="75" style="width:75px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmBoQnQ9VXnB2zTxBG4HeHBi5iti4l06m5fR1UTFq_vFgQaGMmutCjJbuBFU8WHbRj6heToGsiZHlry3dxu5DEimeQbpBAMyhKdSbaWrmIf3bb3zbGU0a1xfsZtNQooayF0dKoJgq8JKzj6U_zQFbZgkFk_BikXTGPX9M8Qnu-vBbWXai3UsNCtD2CZkbG1pkSvRRFeV-VCGHEDnKeMwTm60UKAPXMLKifMhHMWWHLbEf/4jy/vWJH3ApHQoazUFasn9oYhQ/h23/h001.RK5Oi6fS77ngh8GWyZEykXZUicX4dxgxe7vLdv6Vxkk" style="text-decoration:none;"&gt;&lt;img width="22" height="16" alt="yt" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/youtube_dark.png"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;td&gt;&lt;span style="padding-left:1px;"&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="10" style="line-height:1px;font-size:1px;height:10px;"&gt; &nbsp; &lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="w" align="center" valign="top" style="padding:15px 15px 15px 15px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"&gt; Update your email preferences or unsubscribe &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsBhEpz-DJgyVFmavJPa0OyKRRnvw4o7XGyvIv7PRofnm8v7K0ZKJxTtlubPXyNPfsNHd8iRDYRoxTZIqds5st_guDFeK55ncEYqM3XBHpfZoygrWGfnmSTB5gtF4_nSt-Rn4FwN8qczyJ0iAXKlobg7UqZRz_kJ0SCiTv9nXG_pV3Ogk2Qs0tR5ckrcyogd0IlAxCVCQV_8JdQSuA9yozPxXCtfZnVNoqKSQirOfZWAKSUTmeTu_F4FMX9GixBE4P6VDmsOk-c3UuXpIKxZ7SpMHNcCcnHGis8T4ygMOZQdtL2VC45xMmxqP9CFPQA82w1laqgHN_HBnpzytTaGT_1nCgtcGRky7oS8b0BtdgIayKKF8s3FRnobpfa-GcSZfF6BYjvlYbjvdqEbojtuFlaea529F1_fDwMiB_uRnFx_1tGPuaybpWFl9-MP8Tp-z79fN0w85-NwBrBeCpr2CnvUkj0LbtKIGVmzglIgP-B14rJlqzPPJFQqoj6qXVXwWwwxmL_YhqmDejdf1JkIrcZhBe-rCfotFxnImyOHAq2Qpgrqs3LP0fAeh0jC7inqa5jpwgpSJRunHVGy6Fb-tBz6SWJLp13O4Y03IUMy-ZA70skK5rE4MRj8bTJtXofGrpOppP3xKm2uFE-C-AMnFwxa3Y4OoxXghsl1i5j5Y75j7Kl5VuaksyFr5Bh1NIgOy1G4XpLAQ_iLiMsyWFMKehCvEKccEOu_6SPrr8r8Wzud1B4neQ6kicydbWMI4t8PkxQ/4jy/vWJH3ApHQoazUFasn9oYhQ/h24/h001.XcWzB4OM6KFanr5Hrs7na2Xx4V86WAQrMpbs97eaNSE" style="text-decoration:underline;text-decoration-color:#FFFFFF!important;color:#FFFFFF!important;"&gt; here&lt;/a&gt;&lt;/p&gt;&lt;p class="copyright" style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"&gt; &copy; 2025 bycloudai &lt;/p&gt;&lt;p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"&gt; 228 Park Ave S, #29976, New York, New York 10003, United States &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr style="display: table-row !important;"&gt;&lt;td align="center" valign="top" style="padding-top:20px;" style="display:table-cell !important;"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="display:table !important;"&gt;&lt;tr style="display:table-row !important;"&gt;&lt;td class="u" align="center" valign="middle" height="32" style="height:32px;display:table-cell !important; max-height: 32px !important;margin:0px !important; background-color: #ffffff !important;"&gt;&lt;a style="line-height:32px !important;text-decoration:none;display:block !important;" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28olDWFpV5DDKfdk_OdOKOg-W7V5DOZIY1cTde8f9uc4eAOkPDeOuUv72WIWSiVxkjCYp8PayaXOdViYUpcibzEVlXScD4EbpcU5gjavnG3BmLJqPC4XrwxdsmNIccsJa4VM_QJbvYUlkpFs-Wmo423kkYWNf7Tw9QVKNQ6HEBUD6cAp3aX-kUg2cqnUSqf9HIGtNZkv-JooglXJO1HvviO8T3RRs5irQU9k1W1Qu_dW/4jy/vWJH3ApHQoazUFasn9oYhQ/h25/h001.959AdJ97EZPw5y_fYTwa_awr5sZAadJ6Osg4goKa0sY"&gt;&lt;img src="https://media.beehiiv.com/output-onlinepngtools.png" width="16" alt="beehiiv logo" style="display:inline-block !important;max-width:16px !important; vertical-align:-3px !important;width: 16px !important;" border="0"/&gt;&lt;span style="padding-left:11px !important;display: inline-block !important;"&gt;Powered by beehiiv&lt;/span&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" height="2" style="height:2px;"&gt;&lt;a href='https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWsHIaP4XNp0WgUYqLvHcKk_3uqk_KIkz4ddLinhFbud6JuxLFdSUhYnR7b1NSsmbtzXNGNblnEEMKUtkCAjkn8Y/4jy/vWJH3ApHQoazUFasn9oYhQ/h26/h001.bAotHnlYiJSjFXZcs1fUllEOpO0cN3tYigOuyc2bwYU' style="color: #2a2a2a !important; cursor: default; font-size: 1px; text-decoration: none;"&gt; Terms of Service &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/div&gt;&lt;/body&gt;&lt;/html&gt;
Email Content