Email: LM That Replaces Tokens with U-Nets

Subject:
LM That Replaces Tokens with U-Nets
Received:
6/24/2025, 8:35:35 PM
From:
"The AI Timeline" <weekly@mail.bycloud.ai>
To:
desert.glitter.81@getmynews.app
&lt;!DOCTYPE html&gt;&lt;html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" style="font-size:16px;"&gt;&lt;head&gt;&lt;/head&gt;&lt;head&gt;&lt;meta charset="utf-8"/&gt;&lt;!--[if !mso]&gt;&lt;!--&gt;&lt;meta http-equiv="X-UA-Compatible" content="IE=edge"/&gt;&lt;!--&lt;![endif]--&gt;&lt;meta name="viewport" content="width=device-width,initial-scale=1"/&gt;&lt;meta name="x-apple-disable-message-reformatting"/&gt;&lt;meta name="format-detection" content="telephone=no,address=no,email=no,date=no,url=no"/&gt;&lt;meta name="color-scheme" content="light"/&gt;&lt;meta name="supported-color-schemes" content="light"/&gt;&lt;title&gt;Language Modeling with Autoregressive U-Nets&lt;/title&gt;&lt;!--[if mso]&gt;&lt;xml&gt;&lt;o:OfficeDocumentSettings&gt;&lt;o:AllowPNG/&gt;&lt;o:PixelsPerInch&gt;96&lt;/o:PixelsPerInch&gt;&lt;/o:OfficeDocumentSettings&gt;&lt;/xml&gt;&lt;![endif]--&gt;&lt;style&gt;
  :root { color-scheme: light; supported-color-schemes: light; }
  body { margin: 0; padding: 0; min-width: 100%!important; -ms-text-size-adjust: 100% !important; -webkit-transform: scale(1) !important; -webkit-text-size-adjust: 100% !important; -webkit-font-smoothing: antialiased !important; }
  .body { word-wrap: normal; word-spacing:normal; }
  table.mso { width: 100%; border-collapse: collapse; padding: 0; table-layout: fixed; }
  img { border: 0; outline: none; }
  table {  mso-table-lspace: 0px; mso-table-rspace: 0px; }
  td, a, span {  mso-line-height-rule: exactly; }
  #root [x-apple-data-detectors=true],
  a[x-apple-data-detectors=true],
  #MessageViewBody a { color: inherit !important; text-decoration: inherit !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important; }
  span.MsoHyperlink { color: inherit !important; mso-style-priority: 99 !important; }
  span.MsoHyperlinkFollowed { color: inherit !important; mso-style-priority: 99 !important; }
  .a { background-color:#dedede; }
  .b { background-color:#2a2a2a; }
  .c  { background-color:#ffffff; }
  .d { background-color:#fff0c8; }
  .d2 { background-color:#FFFFFF; }
  .d3 { background-color:#FFFFFF; }
  h1 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h2 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h3 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h4 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h5 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h6 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h1, h1 a, h2, h2 a, h3, h3 a, h4, h4 a, h5, h5 a, h6, h6 a, ul, li, ol, p, p a { margin: 0;padding: 0; }
  h1 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:28px;color:#2A2A2A;line-height:42px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h2 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:24px;color:#2A2A2A;line-height:36px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h3 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:20px;color:#2A2A2A;line-height:30px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h4 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:18px;color:#2A2A2A;line-height:27px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h5 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:16px;color:#2A2A2A;line-height:24px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h6 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:14px;color:#2A2A2A;line-height:21px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  p { font-family:'Georgia','Times New Roman',serif;font-weight:400;color:#2D2D2D;font-size:16px;line-height:24px;padding-bottom:8px;padding-top:8px;mso-margin-top-alt:8px;mso-margin-bottom-alt:8px; }
  p a, .e a, ul a, li a, .h a, .h2 a, .h3 a { word-break:break-word;color:#2C81E5 !important;text-decoration:none;font-style:italic; }
  p a span, .e a span, ul a span, li a span { color: inherit }
  p .bold { font-weight:bold;color:#2D2D2D; }
  p span[style*="font-size"] { line-height: 1.6; }
  .f p { font-size:12px;line-height:15px;color:#2D2D2D;padding:0; }
  .f p a { color:#2D2D2D !important; }
  .g p { font-family:'Helvetica',Arial,sans-serif;font-size:14px;line-height:20px;font-weight:normal;margin:0; }
  .g p a  { text-decoration: underline; }
  .i p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; }
  .i p a { color:#2D2D2D !important; }
  .i2 p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; }
  .i2 p a { color:#2D2D2D !important; }
  .i3 p { font-family:'Helvetica',Arial,sans-serif;line-height:43px;font-size:24px;color:#2D2D2D; }
  .i3 p a { color:#2D2D2D !important; }
  .h p a { color:#595959 !important; }
  .h2 p a { color:#595959 !important; }
  .h3 p a { color:#595959 !important; }
  .f p a, .i p a, .i2 p a, .i3 p a, .h p a, .h2 p a, .h3 p a { text-decoration:underline; }
  .j { border-top:3px solid #ffeb2d; }
  .k p { padding-left:15px;padding-bottom:0px;padding-top:6px;mso-margin-top-alt:6px;mso-margin-bottom-alt:0px;mso-margin-left-alt:15px; }
  .o { background-color:#FFFFFF;border:1px solid #F1F1F1;border-radius:5px; }
  .o p { font-family:'Helvetica',Arial,sans-serif;padding:0px;margin:0px; }
  .l p,
  .l p a { font-size:14px;line-height:20px;font-weight: bold;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; }
  .m p,
  .m p a { font-size:13px;line-height:18px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; }
  .n p,
  .n p a { font-size:12px;line-height:17px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; }
  .p { background-color:#FFFFFF;max-width:520px;border:1px solid #E1E8ED;border:1px solid rgba(80, 80, 80, 0.3);border-radius:5px; }
  .q { font-size:16px;font-family:Helvetica,Roboto,Calibri,sans-serif !important;border:1px solid #e1e8ed;border:1px solid rgba(80, 80, 80, 0.3);border-radius:10px;background-color:#FFFFFF; }
  .q p { font-size:16px;font-family:system-ui,Helvetica,Roboto,Calibri,sans-serif !important;color:#222222;padding:4px 0; }
  .r { border:1px solid #E1E8ED !important;border-radius:5px; }
  .s p { font-size: 14px; line-height: 17px; font-weight: 400; color: #697882; text-decoration: none; }
  .t p { font-family:'Helvetica',Arial,sans-serif;font-size:12px;line-height:18px;font-weight:400;color:#000000;font-style:italic;padding:4px 0px 0px; }
  .v { border-radius:10px;border:solid 0px #DFD150;background-color:#2C81E5;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;color:#FFFFFF; }
  .v a { text-decoration:none;display:block;color:#FFFFFF; }
  .w p { font-size:12px;line-height:15px;font-weight:400;color:#FFFFFF; }
  .w p a { text-decoration: underline !important;color:#FFFFFF !important; }
  ul { font-family:'Helvetica',Arial,sans-serif;margin:0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:disc;font-size:16px; }
  ul &gt; li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:disc; }
  ol { font-family:'Helvetica',Arial,sans-serif;margin: 0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:decimal;font-size:16px; }
  ol &gt; li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:decimal; }
  .e h3,
  .e p,
  .e span { padding-bottom:0px;padding-top:0px;mso-margin-top-alt:0px;mso-margin-bottom-alt:0px; }
  .e span,
  .e li { font-family:'Helvetica',Arial,sans-serif;font-size:16px;color:#2D2D2D;line-height:24px; }
  .rec { font-family:  ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji" !important; }
  .rec__button:hover { background-color: #f9fafb !important; }
  .copyright a {color: inherit !important; text-decoration: none !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important;}
  .txt_social p { padding: 0; word-break: break-all; }
  .table, .table-c, .table-h { border: 1px solid #C0C0C0; }
  .table-c { padding:5px; background-color:#FFFFFF; }
  .table-c p { color: #2D2D2D; font-family:'Helvetica',Arial,sans-serif !important;overflow-wrap: break-word; }
  .table-h { padding:5px; background-color:#F1F1F1; }
  .table-h p { color: #2A2A2A; font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif !important;overflow-wrap: break-word; }
  @media only screen and (max-width:667px) {
    .aa { width: 100% !important; }
    .bb img { width: 100% !important; height: auto !important; max-width: none !important; }
    .cc { padding: 0px 8px !important; }
    .ee { padding-top:10px !important;padding-bottom:10px !important; }
    .ff ul, .ff ol { margin: 0px 0px 0px 10px !important;padding: 0px !important; }
    .ff li { margin:10px 0px 0px 10px !important; }
    .r {height:140px !important;}
    .s p { font-size:13px !important;line-height:15px !important; }
    .mob-hide {display:none !important;}
    .mob-stack {display:block !important;width:100% !important;}
    .mob-w-full {width:100% !important;}
    .mob-block {display:block !important;}
    .embed-img {padding:0px 0px 12px 0px !important;}
    .socialShare {padding-top:15px !important;}
    .rec { padding-left:15px!important;padding-right:15px!important; }
    .bodyWrapper { padding:7px 4px 7px 4px !important; }
    .social-mobile {float:left !important;margin-top:10px !important;}
  }
  @media screen and (max-width: 480px) {
    u + .a .gg { width: 100% !important; width: 100vw !important; }
    .tok-heart { padding-top:75% !important; }
    .tok-play { padding-top: 250px !important; }
  }
  @media screen and (max-width: 320px) {
    .tok-heart { padding-top:65% !important; }
  }
  .u { border: 1px solid #CACACA !important; border-radius: 2px !important; background-color: #ffffff !important; padding: 0px 13px 0px 13px !important; font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif !important;font-size: 12px !important; color: #767676 !important; }
  .u a { text-decoration: none; display: block !important; color: #767676 !important; margin: 0px !important; }
  .u span, .u img { color: #767676 !important;margin:0px !important; max-height:32px !important;background-color:#ffffff !important; }
&lt;/style&gt;&lt;!--[if mso]&gt;&lt;style type="text/css"&gt;
    h1, h2, h3, h4, h5, h6 {font-family: Arial, sans-serif !important;}
    body, table, td, p, a, span {font-family: Arial, sans-serif !important;}
    sup { font-size: 100% !important;vertical-align: .5em !important;mso-text-raise: -1.5% !important;line-height: 0 !important; }
    ul { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; }
    ul li { margin-left: 0px !important; mso-special-format: decimal; }
    ol { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; }
    ol li { margin-left: 0px !important; mso-special-format: decimal; }
    li.listItem { margin-left:15px !important; margin-top:0px !important; }
    .paddingDesktop { padding: 10px 0 !important; }
    .edm_outlooklist { margin-left: -20px !important; }
    .embedImage { display:none !important; }
&lt;/style&gt;&lt;![endif]--&gt;&lt;style&gt;
        @font-face {
          font-family: 'Open Sans';
          font-style: normal;
          font-weight: 700;
          font-display: swap;
          src: url('https://fonts.gstatic.com/s/opensans/v40/memSYaGs126MiZpBA-UvWbX2vVnXBbObj2OVZyOOSr4dVJWUgsg-1x4gaVIUwaEQbjA.woff2') format('woff2');
        }

        @font-face {
          font-family: 'Open Sans';
          font-style: italic;
          font-weight: 700;
          font-display: swap;
          src: url('https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@1,700&display=swap') format('woff2');
        }
&lt;/style&gt;&lt;/head&gt;&lt;body class="a" style="margin:0px auto;padding:0px;word-wrap:normal;word-spacing:normal;background-color:#dedede;"&gt;&lt;div role="article" aria-roledescription="email" aria-label="email_name" lang="en" style="font-size:1rem"&gt;&lt;div style="display:none;max-height:0px;overflow:hidden;"&gt; plus more about LongLLaDA and MiniMax-M1 &#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204; &lt;/div&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" align="center" cellpadding="0" class="gg"&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;table role="none" width="670" border="0" cellspacing="0" cellpadding="0" class="aa" style="width:670px;table-layout:fixed;"&gt;&lt;tr&gt;&lt;td class="bodyWrapper" align="center" valign="top" style="padding:7px 7px 7px 7px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="border-width:0px 0px 0px 0px;border-style: solid; border-color: #2a2a2a;border-radius:10px 10px 0px 0px;background-color:#ffffff;" class="c"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr id="header"&gt;&lt;td style="padding:28px 28px 0px 28px;"&gt;&lt;div style="padding-top:0px;padding-right:0px;padding-bottom:20px;padding-left:0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td class="f" align="right" valign="top"&gt;&lt;p&gt; June 24, 2025 &nbsp; | &nbsp; &lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3Ez9vmcuK72CmtLR8zRZkxPSbSoTkI_GcKe6t6eyQEsdjaEkck9u7AJlRrhDF2n08FlFFbUUlXlmEDWuagBCFys2FVlvrNEYC0FWH51ztO9fTcZ_kA25nc9-aU6iHFuvgnA-3CTLTfMDcoPCSfBj2q6I59C7VX-z98wdfgwFQ2M_Gam8Y6__hTEEWVNxIqzvf4kLUzg3B9VjIQyC9TsgtPhK/4hm/yQJr4kcVSrqnFGCYJezVZA/h0/h001.WkSF2DHoAkTuK9ayql_KXX8ydLRlLPQ9pjP-plvnZIQ"&gt;Read Online&lt;/a&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="center" valign="top" style="padding:15px 0;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;h1 style="text-align:left;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-weight:Bold;font-size:32px;color:#2A2A2A;padding:2px 0;line-height:38px;"&gt; Language Modeling with Autoregressive U-Nets &lt;/h1&gt;&lt;p style="text-align:left;font-family:'Helvetica',Arial,sans-serif;font-weight:normal;font-size:20px;color:#3E3E3E;padding:5px 0;line-height:24px;"&gt; plus more about LongLLaDA and MiniMax-M1 &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="height:0px;width:0px;"&gt;&lt;div style="height:1px;" data-open-tracking="true"&gt; &lt;img src="https://elink4f7.mail.bycloud.ai/ss/o/u001.3wmUuY8gEWd4_869a_eXcg/4hm/yQJr4kcVSrqnFGCYJezVZA/ho.gif" alt="" width="1" height="1" border="0" style="height:1px !important;width:1px !important;border-width:0 !important;margin-top:0 !important;margin-bottom:0 !important;margin-right:0 !important;margin-left:0 !important;padding-top:0 !important;padding-bottom:0 !important;padding-right:0 !important;padding-left:0 !important;"/&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr id="content-blocks"&gt;&lt;td class="email-card-body" align="center" valign="top" style="padding-bottom:28px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td id="nov-18-th-nov-24-th-33-latest-ai-re" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h6 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:87.5%;"&gt;&lt;i&gt;June 16th ~ June 22nd&lt;/i&gt;&lt;br&gt;&lt;i&gt;#61 Latest AI Research Explained Simply&lt;/i&gt;&lt;/h6&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="industry-news-in-1-line" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;🗞️ Industry News in 1 Line&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"&gt;&lt;div style="margin-left:0px;" class="edm_outlooklist"&gt;&lt;ol start="1" style="list-style-type:decimal;margin:0px 0px;padding:0px 0px 0px 0px;"&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ SPONSORED&lt;/span&gt;&lt;/span&gt; New &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.VomAAYwkCjux8i_FMc4kJT8kaKnsv3E0yblPIE9wXwaVgWh8jFLxyjNLW2TKkmRfJXG07MfOPxNEw_XF3Puj2fCA4g-nY4b4JCNqm3NlJ1dSm0gJ0_iqGAhqv5WX-tie/4hm/yQJr4kcVSrqnFGCYJezVZA/h1/h001.pZNqCq2BtkNiiYm788PpBGudv-FGO3qXPv7Hm8sSXd0" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Warp 2.0 launch&lt;/span&gt;&lt;/a&gt; introduces &lt;b&gt;an agentic development environment&lt;/b&gt; embeds top-ranked coding agents inside a GPU-accelerated terminal, letting developers prompt, debug, and ship in parallel threads. Currently &lt;b&gt;rank #1 on &lt;/b&gt;&lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28xv3mW_rbyWlcysEinRaYXHY85xtGAORNldy-C7r-Q37-kbNJyd1n6sZJxngXGzv2c2zWQlrRRgf8nYwYayzMI/4hm/yQJr4kcVSrqnFGCYJezVZA/h2/h001.fWlJnQ-kJIYnRSYnMnPmoellwlQHz0C0Nnvreu6w0Sw" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;&lt;b&gt;Terminal Bench&lt;/b&gt;&lt;/span&gt;&lt;/a&gt; (outperforming Claude Code). Use code “&lt;b&gt;BYCLOUD&lt;/b&gt;” for 1 month free Warp Pro. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:540px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/3fd48ce9-5175-47af-b805-885deb228ec3/image.png?t=1750789371" alt="" height="auto" width="540" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:540px;"&gt;&lt;p&gt;rank #1 on &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28xv3mW_rbyWlcysEinRaYUrt9EQcWCneWu2osNE_xGAAcEcbkqOXRFugBllhOQ10mQ8KPPn0umKeQK-Txha9eg/4hm/yQJr4kcVSrqnFGCYJezVZA/h3/h001.WpBd1Vd-PdQ7yJJe1wZyYDvKnHwEwfhAbtrsitZdq8A" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Terminal Bench&lt;/span&gt;&lt;/a&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 3.9k&lt;/span&gt;&lt;/span&gt; Midjourney introduces their first video generation model called &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fk326Sq7RNlKafJjT8dQYNYwzPBJEA8qNjvrTdUa_4GImuZ0FF7AT7ZACFQp4q4JhB9iwGZCwmXoTO-LcCkIFFo5AgzJtbGFxG_JZ5hIC7jcooDw15uvfhnucRf-Tdakp0j8d86vIM0Lkct3nw4VcxtoOQT836zlC4MLr0FNL-WK5oq8HnFjKQVqX8hf9ath/4hm/yQJr4kcVSrqnFGCYJezVZA/h4/h001.8fWrYgDpGWPcfzjdoBCcE2XgGav6qt-BieVOnV0cJ50" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;V1 Video Model&lt;/span&gt;&lt;/a&gt;. It is an extremely aesthetic video generator just like their image generators. You can try it out now for just $10 per month. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:510px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fk326Sq7RNlKafJjT8dQYNYwzPBJEA8qNjvrTdUa_4GImuZ0FF7AT7ZACFQp4q4JhB9iwGZCwmXoTO-LcCkIFFo5AgzJtbGFxG_JZ5hIC7jcooDw15uvfhnucRf-TdakpFpYR6itGmS80ZE3yRU1zPyhPXCgX41MjkcUa005khgLd3IoV0gvPNKZBpFcMrTz/4hm/yQJr4kcVSrqnFGCYJezVZA/h5/h001.mO39vvqPKUln-HmeIlgQuu_7IaCKVmcPcWmccHF317A" target="_blank" rel="noopener noreferrer nofollow" style="text-decoration:none;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/d7c78d4b-5633-4da7-a4a6-c1319e8171fb/PWSCVGJZRhTHHsXP-ezgif.com-video-to-gif-converter.gif?t=1750789521" alt="" height="auto" width="510" style="display:block;width:100%;" border="0"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:510px;"&gt;&lt;p&gt;V1 Video Model Demo&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 1.3k&lt;/span&gt;&lt;/span&gt; Moonshot AI introduces &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.M6gksHPHLyr38XQsW29_hLepP9jjFF9T1SBpsfnBrNn9MZRNyWu1NHpvLkwT6BmPeUND0oLij88rlweFsxhy6b6YxqdLZG5lD8JnkctzCS8ztO4V88OTrXb4IkZqwVXWTqzh7bd1QhrsqWLabdqRbdsgkYOdH-WcQaW1IbxjAqY/4hm/yQJr4kcVSrqnFGCYJezVZA/h6/h001.F5hJ-peaoSv0BEvtuCjGNFe1ydbmli1h-D8CYWTlgFU" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Kimi-Researcher&lt;/span&gt;&lt;/a&gt;, an autonomous LLM agent trained end-to-end with agentic RL that excels at multi-turn search and reasoning, topping &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.1muhFWIqieRYpaJ-FbWSCWu5CwjAPDjn78pzGvEbEiH_kYIQPmeFG4e58T8TaGQHovElwXkqXj82kGwLe6FO_uppqYQw38_Hmlk1CtmmJDij_kIEKGurmxcmA8KqOPRv9Qs1BhmB7WHSmnW4N9BZgQ/4hm/yQJr4kcVSrqnFGCYJezVZA/h7/h001.KkNC3hkwAi8-xJy7ARxXW2mZkLUgOAIq8__mtz1VerY" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;xbench-DeepSearch&lt;/span&gt;&lt;/a&gt;. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:510px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.M6gksHPHLyr38XQsW29_hLepP9jjFF9T1SBpsfnBrNn9MZRNyWu1NHpvLkwT6BmPeUND0oLij88rlweFsxhy6b6YxqdLZG5lD8JnkctzCS8m3uspbVgofYjNZr8qwvi_xCs4C0Lrrh0vHXHamu8OYTNLCNdSnJlkn-GwLcVzn4Q/4hm/yQJr4kcVSrqnFGCYJezVZA/h8/h001.sldt3bnjL1hqBaAXlsCvvo_33_uJQIhDToEyb7xgmI8" target="_blank" rel="noopener noreferrer nofollow" style="text-decoration:none;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/ed367bc0-7ff6-42b1-9c5d-2ad4be79b200/Gt5qIgmbwAM-RCX.jpg?t=1750789677" alt="" height="auto" width="510" style="display:block;width:100%;" border="0"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:510px;"&gt;&lt;p&gt;Kimi-Researcher Benchmark&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 868&lt;/span&gt;&lt;/span&gt; Prime Intellect introduces &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fUNb4GdFo9D3F8WuLArtoVvYoITUYCJ9fyjt2ZATcSvj8GuEwR35mcpO3h4HTkq_9mfpiIOAFwwzu8KVnJnTotpnAYDjBOEzZY268Gxwxz0I98JnbxbI6iKluqqUspo8F0bi3FbwYTR7c3RhnCMYeUdn1aGFNFQslK4zMIWoYRBgyxTtRlqoMVg11OS_J_-c/4hm/yQJr4kcVSrqnFGCYJezVZA/h9/h001.9_Xdrfk9WyXM7QpYb4v9YKs4qJkeG8zDPr3Vo7PAmzc" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;SYNTHETIC-2&lt;/span&gt;&lt;/a&gt;, a planetary-scale peer-to-peer synthetic dataset generation run across decentralized systems. They have the goal of releasing an open reasoning dataset on the toughest RL tasks via this generation run using crowd-sourced GPUs. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:480px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fUNb4GdFo9D3F8WuLArtoVvYoITUYCJ9fyjt2ZATcSvj8GuEwR35mcpO3h4HTkq_9mfpiIOAFwwzu8KVnJnTotpnAYDjBOEzZY268Gxwxz1L4iNASEMaWwck6RqtADmt9R6wTAqfJaLP6AglL4oALg0daBBVSs9u0-zRMRzWWobSHQHnafnc_4psxOakYHGZ/4hm/yQJr4kcVSrqnFGCYJezVZA/h10/h001.A7XZD4L62WioUuJaxHcdPNYeh-yQnZFATaeYvrNcRII" target="_blank" rel="noopener noreferrer nofollow" style="text-decoration:none;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/91b805f1-333d-414e-8f14-d5834b060ce1/Screenshot_2025-06-24_143844.png?t=1750790345" alt="" height="auto" width="480" style="display:block;width:100%;" border="0"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:480px;"&gt;&lt;p&gt;live SYNTHETIC-2 dashboard&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;/ol&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="long-l-la-da-unlocking-long-context" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;LongLLaDA: Unlocking Long Context Capabilities in Diffusion LLMs&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;i&gt;Liu et al. [Fudan University, Shanghai Innovation Institute, Shanghai AI Lab]&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt; ♥ 212 &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; Diffusion LM &lt;/span&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="introduction-to-long-context-challe" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Introduction to Long-Context Challenges&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; In past few weeks, we have seen an uptick in diffusion-based language models (dLLMs) and some newer models like &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fUNb4GdFo9D3F8WuLArtoV5sElgytBlvJRzI9WtI92aYRNZBldoPXXQmyUYwRna0nsSCAsY9iU83L9jlZnqERoNt1z3l6BzRhQ7RMSGdpcXR-2U2ESzkl-uzmBUowhRz/4hm/yQJr4kcVSrqnFGCYJezVZA/h11/h001.XvOn6SP7ird3yBMYre4oNe8iXdqFEyJx1mi7EQAkkY8" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;LLaDA &lt;/span&gt;&lt;/a&gt;show promise in addressing limitations of auto-regressive models. These models can reverse curse mitigation and multimodal adaptability, however we don’t know how they handle and explore the long-context problems. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Traditional auto-regressive LLMs like LLaMA3 struggle really bad when context exceeds their pre-trained length (e.g., 8k tokens). These models suffer perplexity spikes and retrieval failures. But diffusion LLMs show surprising stability in direct extrapolation. This paper investigates why and how we can extend their context capabilities systematically. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/9bd476e3-24d5-4519-b4ba-efe8ee21db4c/image.png?t=1750779359" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="stability-and-local-perception-in-l" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Stability and Local Perception in LLMs&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Diffusion LLMs use bidirectional attention during training, which exposes them to symmetric relative positions (e.g., [-4095, 4095] for a 4k context). This is very different from auto-regressive models, which see only forward positions (e.g., [0, 8191] for 8k). The Rotary Position Embedding (RoPE) in diffusion models captures complete sinusoidal periods for moderate frequencies, which lead to robust extrapolation. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; When the context surpasses training length, diffusion LLMs don’t collapse like auto-regressive counterparts. Instead, they show &quot;local perception&quot;: focusing on recent segments. For instance, in Needle-In-A-Haystack (NIAH) tasks, LLaDA retrieves information from the latest 4k tokens even at 24k context, acting like a sliding window. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/7503cd5b-75f0-4249-9f20-56d2ea2a900f/image.png?t=1750779396" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; After this, we can modulate this behavior if we continue to sample steps further. Increasing the number of steps (e.g., 16 vs. 4) extend retrievable depth slightly, but the fundamental limit remains tied to pre-training context. Visualization of QK states via t-SNE projections allowed us to confirm that diffusion models maintain uniform position-embedding manifolds during extrapolation and avoid the distribution shifts seen in auto-regressive LLMs.  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/a5a6eb55-2af8-4a74-b09c-4d5290b5224f/image.png?t=1750779426" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="extending-llm-context-with-long-l-l" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Extending LLM Context with LongLLaDA&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; After analyzing the stability of LLMs, the authors of this paper suggested LongLLaDA, which is a training-free method to extend context windows. This method adapts NTK-based RoPE extrapolation, which was originally designed for auto-regressive models, to diffusion LLMs. By calculating a scaling factor λ (e.g., λ=14 for 16k context) based on rotary base dimensions and training length, LongLLaDA adjusts position embeddings dynamically. At λ=14, LLaDA achieves near-perfect retrieval across 16k contexts, with the &quot;local perception&quot; effect scaling proportionally. Pushing further to λ=31 (24k) introduces a &quot;lost-in-the-middle&quot; pattern, indicating practical limits, while λ=55 (32k) fails. Crucially, scaling laws for auto-regressive models transfer seamlessly here. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="performance-and-task-specific-stren" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Performance and Task-Specific Strengths of LongLLaDA&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The researchers tested their models on various benchmarks and noticed some nuanced tradeoffs. On NIAH, the diffusion LLMs with LongLLaDA match auto-regressive performance within extended contexts (e.g., 96.4% accuracy at 16k). In LongBench and RULER evaluations: &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"&gt;&lt;div style="margin-left:0px;" class="edm_outlooklist"&gt;&lt;ul style="font-weight:normal;list-style-type:disc;margin-bottom:12px !important;margin-top:12px !important;padding:0px 0px 0px 0px;"&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="font-weight:700;"&gt;&lt;b&gt;Retrieval tasks&lt;/b&gt;&lt;/span&gt;: Diffusion LLMs perform comparably to auto-regressive models. &lt;/p&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="font-weight:700;"&gt;&lt;b&gt;Aggregation tasks (e.g., variable tracing)&lt;/b&gt;&lt;/span&gt;: They lag significantly. &lt;/p&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="font-weight:700;"&gt;&lt;b&gt;QA and synthetic tasks&lt;/b&gt;&lt;/span&gt;: They excel, outperforming LLaMA3 by up to 20% in accuracy. &lt;/p&gt;&lt;/li&gt;&lt;/ul&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; For example, LLaDA-8B-Instruct with λ=14 scored 88.9% on RULER’s QA subset at 8k context, surpassing LLaMA3’s 63.5%. However, aggregation tasks highlighted weaknesses, with scores dropping below 50%. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/ae8755be-5eb3-4040-91b4-6c7c131a46e5/image.png?t=1750779480" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28yjf9KIXZdsXoh1WlHvvKlq6Ua76-pHimz4CaW5rR_L1V9Lf7P-_KUt6J0RfLpVxeVlu0dqaBf0ojIDFaTZNMq6pe6S0LQqUMy72EMbgwTE/4hm/yQJr4kcVSrqnFGCYJezVZA/h12/h001.ETpf3Qi4IzXRbvGJCucBzaL6TRpaiHcE2ZmZA7PH8AY" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"&gt; Read Full Paper &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="from-bytes-to-ideas-language-modeli" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;From Bytes to Ideas: Language Modeling with Autoregressive U-Nets&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;i&gt;Videau et al. [FAIR at Meta, TAU, INRIA and LISN, CNRS & Université Paris-Saclay, INSA Rouen Normandy]&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt; ♥ 22k &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; LLM Architecture &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; bycloud’s pick &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="dd"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.VomAAYwkCjux8i_FMc4kJYzfsuvUMTD4VlMZE84_IWZpGbG8lY6UBe_dbmYqpRLaUoh4a2knu_am7eH6ns9BA2jCo90x1G--GH4CIMGuq9eMRhZTAnHfpj_jeftFFxXbE2bysBZ3riJmsvCxtnJNU3QjCiRitwuNFlo11v-sC-o/4hm/yQJr4kcVSrqnFGCYJezVZA/h13/h001.i4BObxCFNDn71-U9SPyMtVxAUcuDKquY5vqJDq6onS4" style="text-decoration:none;" target="_blank"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"&gt;&lt;tr&gt;&lt;!--[if mso]&gt;&lt;td width="0"&gt;&lt;table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"&gt;&lt;tr&gt;&lt;![endif]--&gt;&lt;td class="embed-img mob-stack" align="center" valign="top" style="width:35%;min-height:100px;vertical-align:middle;display:none;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.VomAAYwkCjux8i_FMc4kJYzfsuvUMTD4VlMZE84_IWZpGbG8lY6UBe_dbmYqpRLaUoh4a2knu_am7eH6ns9BAydmzsLw7mIEu6yKNPf6aC48X1o7jDliTvVE0k0iDy9u4YcY6ytUHbw4c0N4H9CQGxSsVyZYupSolvQK71RxhcA/4hm/yQJr4kcVSrqnFGCYJezVZA/h14/h001.rEGBsK3ao9KHJHTXRbs_yscqGa-mjobppLauEm1WKt8" style="text-decoration:none;" target="_blank"&gt;&lt;img src="https://opengraph.githubassets.com/dad0d549b1bb435dfe8514890ce442a5cc7d7b96a324c4d0f4b71eac843a2a1f/facebookresearch/lingua" width="100%" style="display:block;"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;!--[if mso]&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;![endif]--&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="cc"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="l"&gt;&lt;p&gt;lingua/apps/aunet at main · facebookresearch/lingua&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="m"&gt;&lt;p&gt;Meta Lingua: a lean, efficient, and easy-to-hack codebase to research LLMs. - facebookresearch/lingua&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="bottom" class="n" style="vertical-align:bottom;padding-top:12px;"&gt;&lt;p style="word-break:break-word;"&gt;github.com/facebookresearch/lingua/tree/main/apps/aunet&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;!--[if mso]&gt;&lt;td width="0"&gt;&lt;table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"&gt;&lt;tr&gt;&lt;![endif]--&gt;&lt;td class="mob-hide" align="center" valign="top" style="width:35%;min-height:100px;padding:0px 0px 0px 12px;vertical-align:middle;"&gt;&lt;img src="https://opengraph.githubassets.com/dad0d549b1bb435dfe8514890ce442a5cc7d7b96a324c4d0f4b71eac843a2a1f/facebookresearch/lingua" width="100%" style="display:block;"/&gt;&lt;/td&gt;&lt;!--[if mso]&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;![endif]--&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="introduction-to-autoregressive-u-ne" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Introduction to Autoregressive U-Nets&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Language models usually start with a fixed tokenization step, where they chop text into predefined units like words or subwords. This approach locks in granularity early, forcing models to work within rigid boundaries. For example, a word-level tokenizer might handle &quot;The quick&quot; as two tokens, while a character-level one sees each letter separately. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; However, this inflexibility creates issues: rare tokens become harder to predict. Additionally, morphological relationships like &quot;strawberry&quot; and &quot;strawberries&quot; go unrecognized, and adapting to new languages or dialects is cumbersome. Byte-Pair Encoding (BPE) alleviates some problems but still relies on static embedding tables and a finite vocabulary. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/38c51627-0ab3-4c74-a3f8-967ce7f992f3/image.png?t=1750779748" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;Pooling selects the vectors at the positions specified by the splitting function.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; To overcome these constraints, researchers introduced the Autoregressive U-Net (AU-Net). This method eliminates predefined tokenization by processing raw bytes directly. Instead of embedding tables, it uses attention mechanisms to build contextual representations dynamically. The architecture adapts to multiple levels of granularity, bytes, words, or word groups, creating a flexible hierarchy that evolves with the data. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="inner-workings-of-autoregressive-u-" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Inner Workings of Autoregressive U-Nets&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The AU-Net architecture uses a U-shaped structure with a contracting path and an expanding path. The contracting path compresses the input sequence progressively. At the first level, it pools information at user-defined split points, like spaces between words, to form higher-level representations. For instance, Stage 1 processes individual bytes, Stage 2 pools at word boundaries, and Stage 3 groups every two words. At each split point, vectors are selected and projected into the next stage’s dimensionality using linear layers. Crucially, self-attention ensures these vectors summarize all preceding context, capturing dependencies like word roots or semantic connections. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/cbe64132-323c-40f4-907e-28e6405e14e1/image.png?t=1750779786" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;AU-Net scaling w.r.t compute&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; After this, the skip connections bridge the contracting and expanding paths, preserving fine-grained details. During expansion, coarse vectors from deeper stages guide finer predictions. For example, a vector representing &quot;The quick&quot; might be upsampled to help predict &quot;brown fox&quot; at the byte level. During the upsampling stage, the model duplicates each coarse vector across its segment and applies position-specific linear transformations. This allows deeper stages, which activate less frequently, to influence spelling or phrasing without constant computation. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="evaluation-and-results-of-autoregre" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Evaluation and Results of Autoregressive U-Nets&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The AU-Net architecture was tested against BPE-based transformers and byte-level baselines across benchmarks like Hellaswag, MMLU, and GSM8K. At a 1B-parameter scale with 370B training tokens, AU-Net-4 scored &lt;span style="font-weight:700;"&gt;&lt;b&gt;73.7% on Hellaswag&lt;/b&gt;&lt;/span&gt; and &lt;span style="font-weight:700;"&gt;&lt;b&gt;31.7% on MMLU&lt;/b&gt;&lt;/span&gt;, outperforming BPE’s 70.2% and 27.0%. Multistage hierarchies consistently matched or exceeded baselines, with gains amplifying in reasoning-heavy tasks like ARC Challenge. Efficiency remained practical: AU-Net-2 processed 225K bytes/second on H100 GPUs, rivaling BPE’s 210K. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/330284b2-ffe1-4775-9360-5a51f934bc7d/image.png?t=1750779866" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Additionally, the AU-Net excelled in multilingual settings. For instance, on FLORES-200 translation, it improved BLEU scores for low-resource languages like Faroese (+1.2) and Limburgish (+4.6). In MMLU evaluations across 26 languages, it improved Roman and Germanic languages by 3-4 points, which demonstrates cross-linguistic transfer.  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; However, there are still a few limitations as it under-performs on math-heavy GSM8K due to sparse training data and reliance on space-based splitting for Latin scripts.  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fUNb4GdFo9D3F8WuLArtoV5sElgytBlvJRzI9WtI92boQuqBUN0gs12ihtf0RThG_XMc_lFVnHHlNm0p6DPXElHv2kT4L6aS1SSv8jErq-CVqQZCKERXL71reGvldLL2/4hm/yQJr4kcVSrqnFGCYJezVZA/h15/h001.4Gtu1q0hm40n3Aot0bg5saR1YAZwm55lSVf_XzNNeXE" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"&gt; Read Full Paper &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="mini-max-m-1-scaling-test-time-comp" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;MiniMax-M1: Scaling Test-Time Compute Efficiently with Lightning Attention&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;i&gt;MiniMax Team&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt; ♥ 424 &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; LLM Training &lt;/span&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding:14px 32px 14px 32px;" class="dd"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j22z91kZt2uOBrE1Zkmkn47OtUH-686HQ0zLRiWBM7Bziejplqo7hZ6PKyHQh5sCf9Tbm1Kf9U2Nyz-ZYHxWt2adno0fjh-LjG4qyN_EHkuKa/4hm/yQJr4kcVSrqnFGCYJezVZA/h16/h001.yeW8P_yB0FF3fdGKYeXZ--xnoB9eMj5kxijuUdRYBA4" style="text-decoration:none;" target="_blank"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"&gt;&lt;tr&gt;&lt;!--[if mso]&gt;&lt;td width="0"&gt;&lt;table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"&gt;&lt;tr&gt;&lt;![endif]--&gt;&lt;td class="embed-img" align="center" valign="top" style="width:100%;min-height:100px;vertical-align:middle;padding:0px 0px 12px 0px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j22z91kZt2uOBrE1Zkmkn47OtUH-686HQ0zLRiWBM7BziIwPUM0LWMWFS11jLK_EKjPz-xWcEPMm4EPfPQCs1y35uGSrKy56vy83hp4G9MTQd/4hm/yQJr4kcVSrqnFGCYJezVZA/h17/h001.0N5I29ciFcOFIpasTGpyJyZXvO6N3Zqdo0ycW5AhmTI" style="text-decoration:none;" target="_blank"&gt;&lt;img src="https://filecdn.minimax.chat/public/58eca777-e31f-448a-9823-e2220e49b426.png" width="100%" style="display:block;"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;!--[if mso]&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;![endif]--&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="cc"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="l"&gt;&lt;p&gt;MiniMax Official Website - Intelligence with everyone&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="m"&gt;&lt;p&gt;MiniMax is a leading global technology company and one of the pioneers of large language models (LLMs) in Asia. Our mission is to build a world where intelligence thrives with everyone.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="bottom" class="n" style="vertical-align:bottom;padding-top:12px;"&gt;&lt;p style="word-break:break-word;"&gt;www.minimax.io/news/minimaxm1&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="efficient-reasoning-at-scale-with-m" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Efficient Reasoning at Scale with MiniMax-M1&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Everyone knows that bigger LLMs are better but creating bigger reasoning LLMs is really hard as language models often hit a computational wall. Traditional transformer architectures struggle with the quadratic complexity of attention mechanisms, which makes long-context tasks such as processing million-token inputs or generating extensive reasoning chains, prohibitively expensive. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; This paper introduces the MiniMax-M1 architecture which tackles this by rethinking efficiency from the ground up, combining a novel hybrid architecture with optimized training to slash computational costs while boosting performance in complex domains. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="dd"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.VomAAYwkCjux8i_FMc4kJXVKJUDFWFdYMzacFEcbOUbjcEeEXc1dc-WeELrgV1e4wEDG2e-BmLvy2zmNv2V7rTHZNL2-g9uKLy6L4J51EvJiocwwE42i6X6AoMrM-LYF/4hm/yQJr4kcVSrqnFGCYJezVZA/h18/h001.bEzNTD0x-bPBiYrFY7e-z1yJ2gtVPNNf_fzC1hjVu8g" style="text-decoration:none;" target="_blank"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"&gt;&lt;tr&gt;&lt;!--[if mso]&gt;&lt;td width="0"&gt;&lt;table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"&gt;&lt;tr&gt;&lt;![endif]--&gt;&lt;td class="embed-img mob-stack" align="center" valign="top" style="width:35%;min-height:100px;vertical-align:middle;display:none;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.VomAAYwkCjux8i_FMc4kJXVKJUDFWFdYMzacFEcbOUbjcEeEXc1dc-WeELrgV1e4oPLLhkMtWFdSgcks_lxeb1oLmy35iU1mUNfYLUvgGAhF0g7lZ8wvplDFaT0ukAXp/4hm/yQJr4kcVSrqnFGCYJezVZA/h19/h001.KuadJ_fBF0PPANPUHH43yBP_DLFl9jzaX9AcANWBNlQ" style="text-decoration:none;" target="_blank"&gt;&lt;img src="https://opengraph.githubassets.com/024e9c777026d0eab1c55005096fe38b5554380396e72d3ca0177b7bbef72eec/MiniMax-AI/MiniMax-M1" width="100%" style="display:block;"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;!--[if mso]&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;![endif]--&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="cc"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="l"&gt;&lt;p&gt;GitHub - MiniMax-AI/MiniMax-M1: MiniMax-M1, the world&#39;s first open-weight, large-scale hybrid-attention reasoning model.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="m"&gt;&lt;p&gt;MiniMax-M1, the world&#39;s first open-weight, large-scale hybrid-attention reasoning model. - MiniMax-AI/MiniMax-M1&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="bottom" class="n" style="vertical-align:bottom;padding-top:12px;"&gt;&lt;p style="word-break:break-word;"&gt;github.com/MiniMax-AI/MiniMax-M1&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;!--[if mso]&gt;&lt;td width="0"&gt;&lt;table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"&gt;&lt;tr&gt;&lt;![endif]--&gt;&lt;td class="mob-hide" align="center" valign="top" style="width:35%;min-height:100px;padding:0px 0px 0px 12px;vertical-align:middle;"&gt;&lt;img src="https://opengraph.githubassets.com/024e9c777026d0eab1c55005096fe38b5554380396e72d3ca0177b7bbef72eec/MiniMax-AI/MiniMax-M1" width="100%" style="display:block;"/&gt;&lt;/td&gt;&lt;!--[if mso]&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;![endif]--&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="inner-workings-of-mini-max-m-1" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Inner workings of MiniMax-M1&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The MiniMax-M1 uses a hybrid Mixture-of-Experts (MoE) foundation, which activates 45.9 billion of its 456 billion parameters per token. The main change in this approach is using the Lightning Attention, an I/O-aware linear attention variant integrated into a hybrid block design: every eight Lightning Attention blocks alternate with one traditional softmax attention block. This hybrid approach reduces FLOPs dramatically for long sequences, and uses just 25% of the compute compared to models like DeepSeek R1 at 100K-token generations. Lightning Attention also natively supports 1M-token contexts, which is eight times larger than most competitors. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/0d84a738-3c8a-4f81-a858-909d8c46f8ee/image.png?t=1750780109" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The training process used a three-stage pipeline. First, its continual pretraining on 7.5 trillion tokens improved its STEM and reasoning-heavy data. Next, the supervised fine-tuning step injected chain-of-thought patterns. The final phase used a novel RL algorithm called CISPO (Clipped Importance Sampling Policy Optimization). Unlike prior methods that clip token updates, CISPO stabilizes training by clipping importance sampling weights, which preserves gradient flow for low-probability &quot;fork&quot; tokens critical to reasoning.  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; After this, the researchers added a few architectural tweaks that were essential for RL stability. A precision mismatch between training and inference kernels was resolved by switching the LM head to FP32. Next, they used the optimizer settings (β1=0.9, β2=0.95, ε=1e-15) to accommodate extreme gradient ranges, while repetition detection truncated degenerate outputs early. Together, these enabled full RL training on 512 H800 GPUs in just three weeks. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/3882b5af-808c-47f9-8cd7-dec2140a842c/653801bb-dd43-402b-bd8d-228242abf3f2.png?t=1750779996" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="evaluation-and-benchmark-results-of" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Evaluation and Benchmark Results of MiniMax-M1&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The MiniMax-M1 model &lt;span style="font-weight:700;"&gt;&lt;b&gt;outperforms leading open-weight models&lt;/b&gt;&lt;/span&gt; (DeepSeek-R1, Qwen3-235B) across software engineering, tool use, and long-context tasks. In agentic benchmarks like TAU-Bench, it surpassed Gemini 2.5 Pro, while &lt;span style="font-weight:700;"&gt;&lt;b&gt;outperforming OpenAI o3&lt;/b&gt;&lt;/span&gt; and Claude 4 Opus in long-context understanding. However, it is slightly behind DeepSeek-R1-0528 in math and coding competitions, which highlights a trade-off between specialization and versatility. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/4809daa9-7808-4d7d-82d0-fcd6b6fb3301/d50018df-d646-40e3-824b-e507c209ad03.png?t=1750780011" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28yjf9KIXZdsXoh1WlHvvKlXtZsz3wDauTFzwXUUsTbKX4DvsM6gUryOlAmf_tU8zTBWqkJL9nJS3lhRW34F5kon88bu4mRvAAK1N9biW4If/4hm/yQJr4kcVSrqnFGCYJezVZA/h20/h001.MCDUGzRv1h1IJUGLWoAEGHYK5fp-sffwf8iveAWMsR4" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"&gt; Read Full Paper &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="font-size:0px;line-height:0px;padding:30px 0px 30px;" class="dd"&gt;&lt;table class="j" role="none" width="50%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt; &nbsp; &lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;Share The AI Timeline&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; You currently have &lt;strong&gt;0&lt;/strong&gt; referrals. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; display:none;width:0px;max-height:0px;overflow:hidden;mso-hide:all;height:0;font-size:0;max-height:0;line-height:0;margin:0 auto;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 0;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:300px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsGNUqyW5TiZkyMsF1yreu0byy2KW36J1wDdpoLuXg2TU1F1OW8OHoHaU4-ZmrZpPU4RN-crQCEimD190CSn9fPvQfHEYx7CTSXIidbla7LLK/4hm/yQJr4kcVSrqnFGCYJezVZA/h21/h001.Fsa3jacvHMAkukGdgvoDKlbaiNziWvvt_9spXhW--pk" rel="noopener noreferrer nofollow" style="text-decoration:none;" target="_blank"&gt;&lt;img src="" alt="" height="auto" width="300" style="display:block;width:100%;" border="0"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:left;width:100%;word-break:break-word;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="left" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsGNUqyW5TiZkyMsF1yreu0byy2KW36J1wDdpoLuXg2TU1F1OW8OHoHaU4-ZmrZpPU4RN-crQCEimD190CSn9fPvQfHEYx7CTSXIidbla7LLK/4hm/yQJr4kcVSrqnFGCYJezVZA/h22/h001.-OWnJJPgzkYz7S1pfCwC60QVxiZacPe8sG48lF9xH6k" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"&gt; Click to Share &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Or copy and paste this link to others: &lt;a class="link" href="https://mail.bycloud.ai/subscribe?ref=6SqUHb8KiF&_bhlid=bf7a73b936aab597b0df9777ef50b28c5a049d32" target="_blank" rel="noopener noreferrer nofollow" clicktracking="off"&gt;&lt;span&gt;https://mail.bycloud.ai/subscribe?ref=6SqUHb8KiF&lt;/span&gt;&lt;/a&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="font-size:0px;line-height:0px;padding:30px 0px 30px;" class="dd"&gt;&lt;table class="j" role="none" width="50%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt; &nbsp; &lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="center" valign="top" style="padding:20px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmCcGxcre9qGrlwGwf1j9N2X_eOpAu08jMSRbj51YDi67vclJUOG8T3kwoh7DXm3KwfvVUuacuJo5fNiAisrD4qkHqYjIV51WOBUlEHX4hsOT/4hm/yQJr4kcVSrqnFGCYJezVZA/h23/h001.W3BXVIjk2KCMXhX9UkXXRN3A2ciUBzOMmMmRqfttlBE" style="text-decoration:none;"&gt;&lt;table align="center" width="100%" cellpadding="0" cellspacing="0" border="0" role="none" style="max-width:520px;margin:0 auto;"&gt;&lt;tr&gt;&lt;td class="p" width="100%" style="padding:2px;border:none;"&gt;&lt;table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:100%;"&gt;&lt;div style="max-height:0;position:relative;opacity:0.999;width:100%;mso-hide:all;"&gt;&lt;div style="display:inline-block;width:100%;padding-top:25%;"&gt;&lt;img width="20%" height="auto" loading="lazy" alt="" style="border:0;" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/youtube_play_icon.png"/&gt;&lt;/div&gt;&lt;/div&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmCcGxcre9qGrlwGwf1j9N2Wlt_X_Sis9LZwQcE9KstynlBf6guzEjbHdD0AHPbYFAE5qFLwRmj4bC5l2BEhJOlmmtJ85jx3Kz7-a8pWCbkSS/4hm/yQJr4kcVSrqnFGCYJezVZA/h24/h001.T39wGDdBJRyvqi9yCeK_hEk-5nRv2Laz76QknvEtlvI" style="text-decoration:none;"&gt;&lt;img src="https://i.ytimg.com/vi/z3awgfU4yno/maxresdefault.jpg" width="480" height="auto" loading="lazy" alt="YouTube video by bycloud" style="display:block;height:auto;border:0;outline:none;text-decoration:none;background-color:#000000;width:100%;"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;p style="font-size:12px;font-weight:500;font-style:italic;font-family:Helvetica, Calibri, sans-serif;color: #686a6d; padding-top:0 !important;padding-bottom:6px !important; padding-left:4px !important;"&gt; The LLM&#39;s RL Revelation We Didn&#39;t See Coming &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" style="padding: 20px;"&gt;&lt;table width="100%" cellpadding="0" cellspacing="0" role="none" style="max-width:520px;margin:0 auto;"&gt;&lt;tr&gt;&lt;td class="q" style="padding:16px 16px 6px 16px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoDDFT6eh5Nsg0xYVQj-h6I3o9m2k79_qw4izMYhmcI36CrZOG5cfABsIzpuLXBrt9iOeaDPiM5b50x0emVRb-3xmN9FZ7x9KvfX5xJKZcZNTZtt5ty86k0rQuatJLmo1uYJCMhmUfEnVXT5MsVknUZk/4hm/yQJr4kcVSrqnFGCYJezVZA/h25/h001.VK4Xg2uNBQX4fZ8CeGg8GkSUML2Ip-v61Nze0AGhVmI" style="text-decoration:none !important;"&gt;&lt;table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"&gt;&lt;tr&gt;&lt;td width="100%" style="padding: 0 0 14px 0;text-decoration:none;width:100%;"&gt;&lt;table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"&gt;&lt;tr&gt;&lt;td width="36" style="width:36px;"&gt;&lt;img src="https://pbs.twimg.com/profile_images/1698572487909400576/BvncwnrP_normal.jpg" alt="tw profile: The AI Timeline" style="display:block;width:36px;height:36px;border-radius:50%;border:0;"/&gt;&lt;/td&gt;&lt;td width="400" style="padding:0 0 0 8px;text-decoration:none;"&gt;&lt;span style="display:block;font-size:14px;color:#1c2022;font-weight:700;"&gt; The AI Timeline &lt;/span&gt;&lt;span style="display:block;color:#697882;font-size:14px;"&gt; @TheAITimeline &lt;/span&gt;&lt;/td&gt;&lt;td width="24" align="right" style="vertical-align:text-top;"&gt;&lt;img width="24" height="24" loading="lazy" alt="tw" style="border:0;" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_logo.png"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="word-break:break-word;"&gt;&lt;p&gt;🚨This week's top AI/ML research papers:&lt;/p&gt;&lt;p&gt;- From Bytes to Ideas &lt;br&gt;- MiniMax-M1 &lt;br&gt;- LongLLaDA &lt;br&gt;- Reasoning with Exploration &lt;br&gt;- RLVR Implicitly Incentivizes Correct Reasoning in Base LLMs &lt;br&gt;- Truncated Proximal Policy Optimization &lt;br&gt;- Direct Reasoning Optimization &lt;br&gt;- AceReason-Nemotron 1.1 &lt;br&gt;-&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="padding:12px 0 0 0;"&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" style="padding:8px 0 0 0;width:480px;"&gt;&lt;img src="https://pbs.twimg.com/media/GuG1JzgWsAA72zF.jpg" width="480" height="auto" style="display:block;border:1px solid #E1E8ED;border-radius:5px;width:100%;max-width:480px;height:auto;"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="8" style="line-height:1px;font-size:1px;height:8px;"&gt;&nbsp;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="s"&gt;&lt;p&gt;6:06 AM • Jun 23, 2025&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="10" style="line-height: 1px; font-size: 1px; height: 10px;"&gt;&nbsp;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="1" bgcolor="#e1e8ed" style="line-height:0px;font-size:0px;height:1px;"&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="10" style="line-height:1px;font-size:1px;height:10px;"&gt;&nbsp;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="s"&gt;&lt;p&gt;&lt;b style="color:#1C2022"&gt;533&lt;/b&gt; Likes &nbsp; &lt;b style="color:#1C2022"&gt;52&lt;/b&gt; Retweets &nbsp; &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="s"&gt;&lt;div align="center" style="text-align:center;margin-top:4px;margin-bottom:4px;padding:8px;border:1px solid #ccd6dd;border-radius:9999px;color:#1B95E0"&gt;&lt;b&gt;3 Replies&lt;/b&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt;&lt;tr&gt;&lt;td class="b" align="center" valign="top" bgcolor="#2a2a2a" style="padding:0px 0px 0px 0px;border-style:solid;border-width: 0px 0px 0px 0px;border-color: #2a2a2a;border-bottom-left-radius:10px;border-bottom-right-radius:10px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top" bgcolor="#73ddff" style="padding:12px"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt;&lt;span style="padding-left:1px;"&gt;&lt;/span&gt;&lt;/td&gt;&lt;td align="center" valign="middle" width="75" style="width:75px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.1muhFWIqieRYpaJ-FbWSCQqcWoV4NNHHr5SkP9THApWuHAAlWLQxI3Q_IqFmt_DcyAxeC8jDApCnHmMSBGpBb5sgtimvBYgxRX-Rp7s0F3LjCHoSwdhr83OBqRFhJ1y_/4hm/yQJr4kcVSrqnFGCYJezVZA/h26/h001.pnt2z6ij4yarqhcNLkDMHbSL86J1hHI9n6tqqG_a62Q" style="text-decoration:none;"&gt;&lt;img width="22" height="22" alt="tw" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_dark.png"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;td align="center" valign="middle" width="75" style="width:75px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmBoQnQ9VXnB2zTxBG4HeHBgjMqVxpoXRdj01cjwyoVlHgiebEOgBvwHtevoVpsSvpn3Q1di2ml6sb3cBM-X6IStQbj_zQSVGWJ8AAmPw2en2/4hm/yQJr4kcVSrqnFGCYJezVZA/h27/h001.CNNJWyJZEy4POayGVSMA8HCgnIBq__forfr2ct0LcbQ" style="text-decoration:none;"&gt;&lt;img width="22" height="16" alt="yt" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/youtube_dark.png"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;td&gt;&lt;span style="padding-left:1px;"&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="10" style="line-height:1px;font-size:1px;height:10px;"&gt; &nbsp; &lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="w" align="center" valign="top" style="padding:15px 15px 15px 15px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"&gt; Update your email preferences or unsubscribe &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsBhEpz-DJgyVFmavJPa0OyKRRnvw4o7XGyvIv7PRofnmUBcmrVEcs2seidcITBGssEX3uyVw_5Md0HUGe_2LZp8S42x7K-lEsqCnhszMUaC7x1mK1FuEmZ4Ra0AzGoCPIjUbCuqS23VcY3e0siIiEmI9fdSE6OjfRDK5IhQwXTy2ngSIcuCNktNK5IIZcwUoYjYz-F5KokIz3kzl5v23_Np2_njiWNxIJCbTcHaCobzao6BPAbIr1HFNi4-ErQPVDY3H1PFVxmpUEKrxmermmTp4Q37MA0gnMn5VQcHLju-xybL0qiUQB1rtT3aRCDTXEHk-MG3Craeznd6rHC-8gCU/4hm/yQJr4kcVSrqnFGCYJezVZA/h28/h001.lsjOcgjy-vy7YVNE3xn-EkMaTs17iJ5YXXNfGsps6-k" style="text-decoration:underline;text-decoration-color:#FFFFFF!important;color:#FFFFFF!important;"&gt; here&lt;/a&gt;&lt;/p&gt;&lt;p class="copyright" style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"&gt; &copy; 2025 bycloudai &lt;/p&gt;&lt;p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"&gt; 228 Park Ave S, #29976, New York, New York 10003, United States &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr style="display: table-row !important;"&gt;&lt;td align="center" valign="top" style="padding-top:20px;" style="display:table-cell !important;"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="display:table !important;"&gt;&lt;tr style="display:table-row !important;"&gt;&lt;td class="u" align="center" valign="middle" height="32" style="height:32px;display:table-cell !important; max-height: 32px !important;margin:0px !important; background-color: #ffffff !important;"&gt;&lt;a style="line-height:32px !important;text-decoration:none;display:block !important;" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28olDWFpV5DDKfdk_OdOKOjJs6R3TV24vOpbqKnKbkRVIz3avAJN6SuYgQDK6pVFObjKxGhJpKYFJxya6mjJIbNw7CMR0tPYxq5fRa9xwYxYVce58ezOUz0yPprxIv1i48h1P-PhNhdQhQhFShkA6x0rJ4hFuUFPo_5-KjV-XkLwjVCyFdRiSBwD-nEE2lmPGr0elpEAjSq464Z2p_Dilx459kbdc265cjk2u2jgktgM/4hm/yQJr4kcVSrqnFGCYJezVZA/h29/h001.fSH2dt0NlxpxqrHKJOKfS-YBz1HVwAwvFvkClA0lpLI"&gt;&lt;img src="https://media.beehiiv.com/output-onlinepngtools.png" width="16" alt="beehiiv logo" style="display:inline-block !important;max-width:16px !important; vertical-align:-3px !important;width: 16px !important;" border="0"/&gt;&lt;span style="padding-left:11px !important;display: inline-block !important;"&gt;Powered by beehiiv&lt;/span&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" height="2" style="height:2px;"&gt;&lt;a href='https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWsHIaP4XNp0WgUYqLvHcKk_3uqk_KIkz4ddLinhFbud6JuxLFdSUhYnR7b1NSsmbtzXNGNblnEEMKUtkCAjkn8Y/4hm/yQJr4kcVSrqnFGCYJezVZA/h30/h001.WgEI5PkVQyCzar-kWKWCVajxW_RxFC2Vo9qESgOLJwY' style="color: #2a2a2a !important; cursor: default; font-size: 1px; text-decoration: none;"&gt; Terms of Service &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/div&gt;&lt;/body&gt;&lt;/html&gt;
Email Content