Email: A Shocking RLVR Revelation For LLM Just Dropped

Subject:
A Shocking RLVR Revelation For LLM Just Dropped
Received:
6/3/2025, 7:56:16 PM
From:
"The AI Timeline" <weekly@mail.bycloud.ai>
To:
desert.glitter.81@getmynews.app
&lt;!DOCTYPE html&gt;&lt;html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" style="font-size:16px;"&gt;&lt;head&gt;&lt;/head&gt;&lt;head&gt;&lt;meta charset="utf-8"/&gt;&lt;!--[if !mso]&gt;&lt;!--&gt;&lt;meta http-equiv="X-UA-Compatible" content="IE=edge"/&gt;&lt;!--&lt;![endif]--&gt;&lt;meta name="viewport" content="width=device-width,initial-scale=1"/&gt;&lt;meta name="x-apple-disable-message-reformatting"/&gt;&lt;meta name="format-detection" content="telephone=no,address=no,email=no,date=no,url=no"/&gt;&lt;meta name="color-scheme" content="light"/&gt;&lt;meta name="supported-color-schemes" content="light"/&gt;&lt;title&gt;A Shocking RLVR Revelation For LLM Just Dropped&lt;/title&gt;&lt;!--[if mso]&gt;&lt;xml&gt;&lt;o:OfficeDocumentSettings&gt;&lt;o:AllowPNG/&gt;&lt;o:PixelsPerInch&gt;96&lt;/o:PixelsPerInch&gt;&lt;/o:OfficeDocumentSettings&gt;&lt;/xml&gt;&lt;![endif]--&gt;&lt;style&gt;
  :root { color-scheme: light; supported-color-schemes: light; }
  body { margin: 0; padding: 0; min-width: 100%!important; -ms-text-size-adjust: 100% !important; -webkit-transform: scale(1) !important; -webkit-text-size-adjust: 100% !important; -webkit-font-smoothing: antialiased !important; }
  .body { word-wrap: normal; word-spacing:normal; }
  table.mso { width: 100%; border-collapse: collapse; padding: 0; table-layout: fixed; }
  img { border: 0; outline: none; }
  table {  mso-table-lspace: 0px; mso-table-rspace: 0px; }
  td, a, span {  mso-line-height-rule: exactly; }
  #root [x-apple-data-detectors=true],
  a[x-apple-data-detectors=true],
  #MessageViewBody a { color: inherit !important; text-decoration: inherit !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important; }
  span.MsoHyperlink { color: inherit !important; mso-style-priority: 99 !important; }
  span.MsoHyperlinkFollowed { color: inherit !important; mso-style-priority: 99 !important; }
  .a { background-color:#dedede; }
  .b { background-color:#2a2a2a; }
  .c  { background-color:#ffffff; }
  .d { background-color:#fff0c8; }
  .d2 { background-color:#FFFFFF; }
  .d3 { background-color:#FFFFFF; }
  h1 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h2 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h3 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h4 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h5 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h6 a { text-decoration:none;color:#2C81E5;font-style:italic; }
  h1, h1 a, h2, h2 a, h3, h3 a, h4, h4 a, h5, h5 a, h6, h6 a, ul, li, ol, p, p a { margin: 0;padding: 0; }
  h1 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:28px;color:#2A2A2A;line-height:42px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h2 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:24px;color:#2A2A2A;line-height:36px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h3 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:20px;color:#2A2A2A;line-height:30px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h4 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:18px;color:#2A2A2A;line-height:27px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h5 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:16px;color:#2A2A2A;line-height:24px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  h6 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:14px;color:#2A2A2A;line-height:21px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px }
  p { font-family:'Georgia','Times New Roman',serif;font-weight:400;color:#2D2D2D;font-size:16px;line-height:24px;padding-bottom:8px;padding-top:8px;mso-margin-top-alt:8px;mso-margin-bottom-alt:8px; }
  p a, .e a, ul a, li a, .h a, .h2 a, .h3 a { word-break:break-word;color:#2C81E5 !important;text-decoration:none;font-style:italic; }
  p a span, .e a span, ul a span, li a span { color: inherit }
  p .bold { font-weight:bold;color:#2D2D2D; }
  p span[style*="font-size"] { line-height: 1.6; }
  .f p { font-size:12px;line-height:15px;color:#2D2D2D;padding:0; }
  .f p a { color:#2D2D2D !important; }
  .g p { font-family:'Helvetica',Arial,sans-serif;font-size:14px;line-height:20px;font-weight:normal;margin:0; }
  .g p a  { text-decoration: underline; }
  .i p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; }
  .i p a { color:#2D2D2D !important; }
  .i2 p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; }
  .i2 p a { color:#2D2D2D !important; }
  .i3 p { font-family:'Helvetica',Arial,sans-serif;line-height:43px;font-size:24px;color:#2D2D2D; }
  .i3 p a { color:#2D2D2D !important; }
  .h p a { color:#595959 !important; }
  .h2 p a { color:#595959 !important; }
  .h3 p a { color:#595959 !important; }
  .f p a, .i p a, .i2 p a, .i3 p a, .h p a, .h2 p a, .h3 p a { text-decoration:underline; }
  .j { border-top:3px solid #ffeb2d; }
  .k p { padding-left:15px;padding-bottom:0px;padding-top:6px;mso-margin-top-alt:6px;mso-margin-bottom-alt:0px;mso-margin-left-alt:15px; }
  .o { background-color:#FFFFFF;border:1px solid #F1F1F1;border-radius:5px; }
  .o p { font-family:'Helvetica',Arial,sans-serif;padding:0px;margin:0px; }
  .l p,
  .l p a { font-size:14px;line-height:20px;font-weight: bold;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; }
  .m p,
  .m p a { font-size:13px;line-height:18px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; }
  .n p,
  .n p a { font-size:12px;line-height:17px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; }
  .p { background-color:#FFFFFF;max-width:520px;border:1px solid #E1E8ED;border:1px solid rgba(80, 80, 80, 0.3);border-radius:5px; }
  .q { font-size:16px;font-family:Helvetica,Roboto,Calibri,sans-serif !important;border:1px solid #e1e8ed;border:1px solid rgba(80, 80, 80, 0.3);border-radius:10px;background-color:#FFFFFF; }
  .q p { font-size:16px;font-family:system-ui,Helvetica,Roboto,Calibri,sans-serif !important;color:#222222;padding:4px 0; }
  .r { border:1px solid #E1E8ED !important;border-radius:5px; }
  .s p { font-size: 14px; line-height: 17px; font-weight: 400; color: #697882; text-decoration: none; }
  .t p { font-family:'Helvetica',Arial,sans-serif;font-size:12px;line-height:18px;font-weight:400;color:#000000;font-style:italic;padding:4px 0px 0px; }
  .v { border-radius:10px;border:solid 0px #DFD150;background-color:#2C81E5;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;color:#FFFFFF; }
  .v a { text-decoration:none;display:block;color:#FFFFFF; }
  .w p { font-size:12px;line-height:15px;font-weight:400;color:#FFFFFF; }
  .w p a { text-decoration: underline !important;color:#FFFFFF !important; }
  ul { font-family:'Helvetica',Arial,sans-serif;margin:0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:disc;font-size:16px; }
  ul &gt; li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:disc; }
  ol { font-family:'Helvetica',Arial,sans-serif;margin: 0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:decimal;font-size:16px; }
  ol &gt; li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:decimal; }
  .e h3,
  .e p,
  .e span { padding-bottom:0px;padding-top:0px;mso-margin-top-alt:0px;mso-margin-bottom-alt:0px; }
  .e span,
  .e li { font-family:'Helvetica',Arial,sans-serif;font-size:16px;color:#2D2D2D;line-height:24px; }
  .rec { font-family:  ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji" !important; }
  .rec__button:hover { background-color: #f9fafb !important; }
  .copyright a {color: inherit !important; text-decoration: none !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important;}
  .txt_social p { padding: 0; word-break: break-all; }
  .table, .table-c, .table-h { border: 1px solid #C0C0C0; }
  .table-c { padding:5px; background-color:#FFFFFF; }
  .table-c p { color: #2D2D2D; font-family:'Helvetica',Arial,sans-serif !important;overflow-wrap: break-word; }
  .table-h { padding:5px; background-color:#F1F1F1; }
  .table-h p { color: #2A2A2A; font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif !important;overflow-wrap: break-word; }
  @media only screen and (max-width:667px) {
    .aa { width: 100% !important; }
    .bb img { width: 100% !important; height: auto !important; max-width: none !important; }
    .cc { padding: 0px 8px !important; }
    .ee { padding-top:10px !important;padding-bottom:10px !important; }
    .ff ul, .ff ol { margin: 0px 0px 0px 10px !important;padding: 0px !important; }
    .ff li { margin:10px 0px 0px 10px !important; }
    .r {height:140px !important;}
    .s p { font-size:13px !important;line-height:15px !important; }
    .mob-hide {display:none !important;}
    .mob-stack {display:block !important;width:100% !important;}
    .mob-w-full {width:100% !important;}
    .mob-block {display:block !important;}
    .embed-img {padding:0px 0px 12px 0px !important;}
    .socialShare {padding-top:15px !important;}
    .rec { padding-left:15px!important;padding-right:15px!important; }
    .bodyWrapper { padding:7px 4px 7px 4px !important; }
    .social-mobile {float:left !important;margin-top:10px !important;}
  }
  @media screen and (max-width: 480px) {
    u + .a .gg { width: 100% !important; width: 100vw !important; }
    .tok-heart { padding-top:75% !important; }
    .tok-play { padding-top: 250px !important; }
  }
  @media screen and (max-width: 320px) {
    .tok-heart { padding-top:65% !important; }
  }
  .u { border: 1px solid #CACACA !important; border-radius: 2px !important; background-color: #ffffff !important; padding: 0px 13px 0px 13px !important; font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif !important;font-size: 12px !important; color: #767676 !important; }
  .u a { text-decoration: none; display: block !important; color: #767676 !important; margin: 0px !important; }
  .u span, .u img { color: #767676 !important;margin:0px !important; max-height:32px !important;background-color:#ffffff !important; }
&lt;/style&gt;&lt;!--[if mso]&gt;&lt;style type="text/css"&gt;
    sup { font-size: 100% !important;vertical-align: .5em !important;mso-text-raise: -1.5% !important;line-height: 0 !important; }
    ul { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; }
    ul li { margin-left: 0px !important; mso-special-format: decimal; }
    ol { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; }
    ol li { margin-left: 0px !important; mso-special-format: decimal; }
    li.listItem { margin-left:15px !important; margin-top:0px !important; }
    .paddingDesktop { padding: 10px 0 !important; }
    .edm_outlooklist { margin-left: -20px !important; }
    .embedImage { display:none !important; }
&lt;/style&gt;&lt;![endif]--&gt;&lt;style&gt;
        @font-face {
          font-family: 'Open Sans';
          font-style: normal;
          font-weight: 700;
          font-display: swap;
          src: url('https://fonts.gstatic.com/s/opensans/v40/memSYaGs126MiZpBA-UvWbX2vVnXBbObj2OVZyOOSr4dVJWUgsg-1x4gaVIUwaEQbjA.woff2') format('woff2');
        }

        @font-face {
          font-family: 'Open Sans';
          font-style: italic;
          font-weight: 700;
          font-display: swap;
          src: url('https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@1,700&display=swap') format('woff2');
        }
&lt;/style&gt;&lt;/head&gt;&lt;body class="a" style="margin:0px auto;padding:0px;word-wrap:normal;word-spacing:normal;background-color:#dedede;"&gt;&lt;div role="article" aria-roledescription="email" aria-label="email_name" lang="en" style="font-size:1rem"&gt;&lt;div style="display:none;max-height:0px;overflow:hidden;"&gt; Read about &quot;Rethinking Training Signals in RLVR&quot;, why LLMs are headless chickens, and &quot;Learning to Reason without External Rewards&quot; &#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204;&#160;&#8204; &lt;/div&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" align="center" cellpadding="0" class="gg"&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;table role="none" width="670" border="0" cellspacing="0" cellpadding="0" class="aa" style="width:670px;table-layout:fixed;"&gt;&lt;tr&gt;&lt;td class="bodyWrapper" align="center" valign="top" style="padding:7px 7px 7px 7px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="border-width:0px 0px 0px 0px;border-style: solid; border-color: #2a2a2a;border-radius:10px 10px 0px 0px;background-color:#ffffff;" class="c"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr id="header"&gt;&lt;td style="padding:28px 28px 0px 28px;"&gt;&lt;div style="padding-top:0px;padding-right:0px;padding-bottom:20px;padding-left:0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td class="f" align="right" valign="top"&gt;&lt;p&gt; June 03, 2025 &nbsp; | &nbsp; &lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EyTT-jsZ1xMzxu6G4_F1i8tF8XRqAYKqOE1dTFvExUN-gmtqaycFFPZy69kr91ELKttmixIFNWpScZ1kpIUCAy3QnL6351aYey0UTJJA_v0fM_JzfGC-dnRrfogthwGn6WtAWLZDGWn-hk_1oQpGnxhY4S-GalO4ls803m0p1UVo_103bxxn5BvqHyT7xYy9uDWf3g0uDYBRha1LX6LHcZ-DaZrSiNChPBzLWzX3pkBw7GDUYnwdJ5tth5AqX1gB-c8bidvLlL_6FTVkdUplsGNt0Q30zu0lzxHKE-GCziGyXVY24mGu8dHBp9_5aU5Nzw0zDyRG29zpHlz91bQqtPPZqvfmrOabmiYe56TdoWQnsZKAOtcZ2VeRZoAyrdnze-2wizTfpzUOYpIAgNutrJ0r-Ix6LELHohohOR3y_KHTh0Cxvdk5co6jGbiZWW0ZMsJh0IxX2ubxPCWwZ5R3AK5d8sIYIxAKXU5USqDYvXaSMWwU8I90SrizokpT4uJOLOXCKvwCGDC1ryoDu54LEh7CeHHLowCOAItSgKfxCx6feyR5wMCb01kAU8k_ffbdRsfdI4e5gDEShe70NqXiUsNXFB6OogaUlPGqqf53v_8Ng0Tjc7fD6On771WI8BA7kcQ1fRSMCjdKZLAZPgdDSL30aNWeMWiM16XusoOyM0b_A/4h1/ta1qJhW0TuGlEE60_6KM6w/h0/h001.FcetaabkwCmcvS1YHP1nNW7UWuTbmAhWXLJU66FT68c"&gt;Read Online&lt;/a&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="center" valign="top" style="padding:15px 0;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;h1 style="text-align:left;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-weight:Bold;font-size:32px;color:#2A2A2A;padding:2px 0;line-height:38px;"&gt; A Shocking RLVR Revelation For LLM Just Dropped &lt;/h1&gt;&lt;p style="text-align:left;font-family:'Helvetica',Arial,sans-serif;font-weight:normal;font-size:20px;color:#3E3E3E;padding:5px 0;line-height:24px;"&gt; Read about &quot;Rethinking Training Signals in RLVR&quot;, why LLMs are headless chickens, and &quot;Learning to Reason without External Rewards&quot; &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="height:0px;width:0px;"&gt;&lt;div style="height:1px;" data-open-tracking="true"&gt; &lt;img src="https://elink4f7.mail.bycloud.ai/ss/o/u001.3wmUuY8gEWd4_869a_eXcg/4h1/ta1qJhW0TuGlEE60_6KM6w/ho.gif" alt="" width="1" height="1" border="0" style="height:1px !important;width:1px !important;border-width:0 !important;margin-top:0 !important;margin-bottom:0 !important;margin-right:0 !important;margin-left:0 !important;padding-top:0 !important;padding-bottom:0 !important;padding-right:0 !important;padding-left:0 !important;"/&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr id="content-blocks"&gt;&lt;td class="email-card-body" align="center" valign="top" style="padding-bottom:28px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td id="nov-18-th-nov-24-th-33-latest-ai-re" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h6 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:87.5%;"&gt;&lt;i&gt;May 26th ~ June 2nd&lt;/i&gt;&lt;br&gt;&lt;i&gt;#58 Latest AI Research Explained Simply&lt;/i&gt;&lt;/h6&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="industry-news-in-1-line" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;🗞️ Industry News in 1 Line&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"&gt;&lt;div style="margin-left:0px;" class="edm_outlooklist"&gt;&lt;ol start="1" style="list-style-type:decimal;margin:0px 0px;padding:0px 0px 0px 0px;"&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 9.8k&lt;/span&gt;&lt;/span&gt; DeepSeek released DeepSeek-R1-0528, a version 2 for its R1 model. Around 10~20% performance increase compared to R1v1. It is currently the SoTA open source model, and they also distilled a DeepSeek-R1-0528-Qwen3-8B. You can watch &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmCmlQmBIg0bN-SAF5BJsdzHP7xW1zEYS3RMn1ZHZnWcWeqTuZAT6jXrhQuuqoswvnJAAWRTmdAQffMTDvo1PYKY8G2cRDdTeW7gtLZKO1m4f/4h1/ta1qJhW0TuGlEE60_6KM6w/h1/h001.1m4s87AB7oql9j8AZqoMgF5SCueJvo228U_J8A8k_Yw" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;my video&lt;/span&gt;&lt;/a&gt; for a brief overview. Weights are now available on &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWoNV4Z0gzLAqdXCVgcyx3wsmEMg5nw7JnMGZ7SvhfwkV1t2PsS8D8YLW9fIth0J3z3NTkKm6Knn_l65wBueiUe-6Zvi_wOX3Eqs7CaMCGBaFpIXyc4H0DnCFVhVBcjYxaA/4h1/ta1qJhW0TuGlEE60_6KM6w/h2/h001.k0iaxwP4fwN7uPA9Pv-_NpaOcMIuyFjRk8wdjyfZM04" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Huggingface&lt;/span&gt;&lt;/a&gt;. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:480px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/84b61904-b493-4b7f-b47f-b6e231a116db/GsHZfE_aUAEo64N.png?t=1748978442" alt="" height="auto" width="480" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:480px;"&gt;&lt;p&gt;DeepSeek-R1-0528 Benchmarks&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 2.5k&lt;/span&gt;&lt;/span&gt; Black Forest Labs, founded by the key people behind Stable Diffusion, has released FLUX.1 Kontext. Unlike traditional text-to-image models, Kontext understands both text AND images as input, enabling true in-context generation and editing. Currently SoTA in image editing with text. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:450px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/427e5156-afca-4796-a069-5afc7abd913c/GsIi2ydXYAAbfjw.jpg?t=1748978693" alt="" height="auto" width="450" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:450px;"&gt;&lt;p&gt;FLUX.1 Kontext text-based image editing demo&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt;♥ 4.9k&lt;/span&gt;&lt;/span&gt;&lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j2yo571wOI2ayM48Skcu9lJLxMIzRQF48H5SHUl2j_ayGWWg9D0kezf7jrb1xOiYPoo-3PAWvIe4hMcFxP9z9RK_tF-Qc9rG57Fl1CLdWevyryTuYT9gYH4sxXTsFxSqr3Pj5j_R8W3uLqleQc8REung/4h1/ta1qJhW0TuGlEE60_6KM6w/h3/h001.zm86a5Myb4uPrHNq93bTzEfTqzxuOrmjiNHyZo-7XWk" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt; Anthropic has open-sourced its circuit tracing tools&lt;/span&gt;&lt;/a&gt; for their mechanistic interpretability research. These tools utilize cross-layer transcoders to construct interpretable graphs, allowing for interventions on model features to observe changes in output. The interactive Neuronpedia platform supports visualization and annotation of these graphs, helping studies on behaviors such as multi-step reasoning and hallucination suppression in models like Gemma-2-2B and Llama-3.2-1B. &lt;/p&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/c5350aa2-3e45-425b-b60d-4ec09c4ec507/e370dd79d6246cc1afc45e0b7b872b6d392801cf-3790x1748.jpg?t=1748978931" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:600px;"&gt;&lt;p&gt;An overview of the interactive graph explorer UI on Neuronpedia.&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/li&gt;&lt;/ol&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="transparent" style="background-color:transparent;border-color:#2C81E5;border-style:solid;border-width:5px;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j29rweNtSHTMPdCBNsuu15hStNWn7Cp-_z7KAfyQIe6He4nQT_96lcVXX1db5XGEmA2fwDiqKQU_6H5ubd8zR_R7fdhqA0y5gF6aRHwtvIqQk/4h1/ta1qJhW0TuGlEE60_6KM6w/h4/h001.GTogfZ-shem_RSFq2ygD38AHEiqocrPHJ1oN30z3rc8" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;findmypapers.ai&lt;/span&gt;&lt;/a&gt;&lt;/span&gt;&lt;span style=""&gt; got a tiny new feature &lt;/span&gt;👀&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j29rweNtSHTMPdCBNsuu15hStNWn7Cp-_z7KAfyQIe6HeOFOoVeJhHHgfKKiaK6i5-Ic350jZ21CxuRI2MkUHelTFWfhrsXZS1ZYo28G1EYDp/4h1/ta1qJhW0TuGlEE60_6KM6w/h5/h001.ndNWrZlfgPFwqRwyeQzLvqz6crrwvDbqem_fNjNs6fE" rel="noopener noreferrer nofollow" style="text-decoration:none;" target="_blank"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/bfba51a0-ee8e-42d5-8b45-51ddfd5ebe33/image.png?t=1748369227" alt="" height="auto" width="600" style="display:block;width:100%;border-radius:0px 0px 0px 0px;border-style:solid;border-width:0px 0px 0px 0px;box-sizing:border-box;border-color:#E5E7EB;" border="0"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;&lt;span style=""&gt;papers preview, go give it a spin!&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;"&gt;While we are improving the retrieval quality for finding AI research papers, we still want to make the search experience a bit less boring.&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;"&gt;So now, we are able to display the papers that are being searched on! (truly a new tiny feature lol)&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j29rweNtSHTMPdCBNsuu15hStNWn7Cp-_z7KAfyQIe6Heu4XfgVZCIYSF5UjwZDASpyWFcfhFkP2TFOOwHXWr6iDTPPq42Rx9e09rZOxXYfGf/4h1/ta1qJhW0TuGlEE60_6KM6w/h6/h001.i7sCMZ4i1npgkwcd0tSmtuaekpU9RMcAfzJi1LXhXC0" rel="noopener noreferrer nofollow" style="text-decoration:none;" target="_blank"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/b33452d0-4484-4fe0-aa6b-68f337e064b3/image.png?t=1748369486" alt="" height="auto" width="600" style="display:block;width:100%;border-radius:0px 0px 0px 0px;border-style:solid;border-width:0px 0px 0px 0px;box-sizing:border-box;border-color:#E5E7EB;" border="0"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"&gt;&lt;p&gt;&lt;span style=""&gt;and a nicer citation section&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j29rweNtSHTMPdCBNsuu15hStNWn7Cp-_z7KAfyQIe6He9SUNC4nbJ6LlgkqlU9UDXwVB3vJvizYgRJ7hNxQcy0ODHSr63C3LoSd725TYLfvP/4h1/ta1qJhW0TuGlEE60_6KM6w/h7/h001.HMR4vkmtTLebKmXrCQiSnof61ZiqkNZXqB1JSC-tIrw" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"&gt; Check Out FMP &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="font-size:0.8rem;font-weight:500;"&gt;&lt;b&gt;&lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoGymQ3NNPtd5dE5MV_8UgjIDFPVXngz8pvQBldSW42yhUe_Qiq6DgEPMEBuPL9yfRpXelTiuu2kS8pLFvsoem_XoZoy_n13sTKUhZIbl0VH6/4h1/ta1qJhW0TuGlEE60_6KM6w/h8/h001.ZMNCRiwV5jyLkErSKDx7h4z1skTw9mfpHgWHPZvx8Cg" target="_blank" rel="noopener noreferrer nofollow"&gt;&lt;span&gt;Advertise with The AI Timeline! &lt;/span&gt;&lt;/a&gt;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="spurious-rewards-rethinking-trainin" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;Spurious Rewards: Rethinking Training Signals in RLVR&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;i&gt;Shao et al. [University of Washington, Allen Institute for Artificial Intelligence, University of California]&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt; ♥ 1.7k &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; LLM RLVR &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; bycloud’s pick &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="reinforcement-learnings-surprising-" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Reinforcement Learning&#39;s Surprising Math Boost&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="font-weight:700;"&gt;&lt;b&gt;Reinforcement learning with verifiable rewards (RLVR)&lt;/b&gt;&lt;/span&gt; has become the default method for improving mathematical reasoning in language models. But what if the rewards guiding this learning are completely disconnected from actual correctness?  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; This study tells us that certain models improve dramatically even when trained on random or deliberately incorrect rewards. This discovery challenges assumptions about how RLVR works and highlights critical differences in model pretraining. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/281b7147-a77c-47e8-93a1-16677e994a26/image.png?t=1748967797" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="the-mechanism-behind-reward-resilie" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;The Mechanism Behind Reward Resilience&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The researchers of this study tested the RLVR method across multiple reward types on mathematical benchmarks like MATH-500. For Qwen models, rewards ranged from ground-truth labels to &quot;spurious&quot; signals like random binary assignments or incentives for incorrect answers. Surprisingly, all rewards (even those with zero correlation to correctness) gave significant accuracy gains. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; After analyzing this behavior, the researchers analyzed that Qwen models frequently use &quot;code reasoning&quot; to generate Python-like pseudocode to structure solutions without execution. Before training, this appeared in 66.7% of responses. After RLVR with &lt;span style=""&gt;&lt;i&gt;any&lt;/i&gt;&lt;/span&gt; reward, it surged past 90%. This shift correlated strongly with performance improvements, suggesting RLVR amplifies pretrained capabilities rather than imparting new knowledge. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; For non-Qwen models like Llama or OLMo, spurious rewards failed. These models lack Qwen’s pretrained affinity for code reasoning. When they attempted code generation, accuracy often dropped.  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/00bbc41e-cf72-4e85-b807-52650008a2a5/image.png?t=1748967829" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="performance-gains-and-implications" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Performance Gains and Implications&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The tests on RLVR approach with Qwen2.5-Math-7B achieved 21-26% accuracy gains on MATH-500 using spurious rewards (random, format-based, or incorrect labels) which is quite close to the 28.8% gain from ground-truth rewards. However, Llama and OLMo models saw minimal or negative changes with identical rewards. For example, OLMo2-7B only improved with ground-truth supervision. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/a02db2e4-43dc-4012-94d6-04b0c781409a/image.png?t=1748967937" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Forcing Qwen models to generate code via prompts improved accuracy by 11-25%, while suppressing it reduced gains. However, when Non-Qwen models were prompted for code, they performed worse. This confirms code reasoning as a primary driver behind Qwen’s reward-agnostic improvements, and that a handful of RLVR research may need to be re-evaluated as a lot of them &lt;b&gt;only validated their performance Qwen model families, thus their results may fail to generalize&lt;/b&gt;. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.VomAAYwkCjux8i_FMc4kJT_8c5vMMgO6TRThUcmLGl8sP0qnfPKUbqfIydWk4fTjc6_tY54WbqEnOg74OoXbuOkEkAbKQhB01U28S3wx0ERGJB6nDKDbbc05Kyy8MFzaYsoB9X66Y5s85lyc_SfOpYGZmS_5xVxYCwyDtEvu21Xud0xYTNOWm0hrTiQ-5maX/4h1/ta1qJhW0TuGlEE60_6KM6w/h9/h001.X4aOrFqLEWbg0QPFR3E4dLk4OYKgaQg9uJl4YW-vLow" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"&gt; Read Full Paper &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="reasoning-ll-ms-are-wandering-solut" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;Reasoning LLMs are Wandering Solution Explorers&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;i&gt;Lu et al. [NUS AI Institute]&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt; ♥ 361 &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; LLM Reasoning Search &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="introduction-to-the-reasoning-ll-ms" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Introduction to the Reasoning LLMs&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; LLMs don’t have very good reasoning skills but they try to keep up by using techniques like chain-of-thought prompting to tackle complex problems. But under the hood, these reasoning LLMs (RLLMs) often wander aimlessly through solution spaces rather than exploring them systematically. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; This paper shows that as problems grow more complex, RLLMs stumble and produce invalid steps, redundant explorations, or unfaithful conclusions. Current benchmarks might hide these problems on simpler tasks, but as the tasks get more complex, the performance of models suffer severely.  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/e883aa9d-c274-40f9-a56a-6a34072efc3d/image.png?t=1748967990" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="why-transformers-suck-at-systematic" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Why Transformers Suck at Systematic Exploration&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Systematic exploration requires three steps:  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"&gt;&lt;div style="margin-left:0px;" class="edm_outlooklist"&gt;&lt;ul style="font-weight:normal;list-style-type:disc;margin-bottom:12px !important;margin-top:12px !important;padding:0px 0px 0px 0px;"&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="font-weight:700;"&gt;&lt;b&gt;Validity&lt;/b&gt;&lt;/span&gt; means each reasoning step follows the problem’s rules, like staying within bounds during a grid search. &lt;/p&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="font-weight:700;"&gt;&lt;b&gt;Effectiveness&lt;/b&gt;&lt;/span&gt; ensures the model reaches a valid solution. &lt;/p&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt;&lt;span style="font-weight:700;"&gt;&lt;b&gt;Necessity&lt;/b&gt;&lt;/span&gt; means no wasted steps. i.e. every action must directly contribute to solving the problem or ruling out dead ends. &lt;/p&gt;&lt;/li&gt;&lt;/ul&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; For instance, in depth-first search tasks, a systematic explorer would backtrack correctly after hitting dead ends, avoiding redundant paths. Current RLLMs violate these principles repeatedly. They commit boundary violations, like hallucinating array indices beyond actual limits. Procedure omission occurs when models skip essential substeps, halting prematurely. Incorrect backtracking leads them to restore outdated states, corrupting the search. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/f33e047d-ed50-419a-892e-a666cb108af9/image.png?t=1748968016" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Unnecessary explorations can lead to state revisitation or infinite loops which repeat failed approaches until resources exhaust. Evaluation errors exacerbate these issues, such as using stale data in dynamic programming or miscomputing values mid-reasoning. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; These failures are mainly caused due to architectural gaps. Transformers lack built-in mechanisms for state tracking or structured backtracking. Without explicit memory management or loop-exit heuristics, RLLMs rely on local context, overlooking global constraints. For example, in permutation tasks, smaller models degrade faster, but even top-tier systems like Anthropic-Sonnet-3.7 eventually falter as complexity mounts. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="evaluating-transformers-on-complex-" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Evaluating Transformers on Complex Problems&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The researchers conducted experiments on tasks like permutation generation and all tested RLLMs show exponential performance decay as problem size increases. In one test, models created permutations of lists with duplicates. Additionally, the ratio of valid solutions found plummeted for larger inputs across all models, including commercial giants like OpenAI-O3. Smaller open-source models deteriorated fastest, but none of them achieved systematicity.  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/9a1df915-c0e7-43f6-b429-e644b6175d0e/image.png?t=1748968047" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The findings signal three urgent shifts for AI research. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"&gt;&lt;div style="margin-left:0px;" class="edm_outlooklist"&gt;&lt;ol start="1" style="list-style-type:decimal;margin:0px 0px;padding:0px 0px 0px 0px;"&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt; First, architectures need components like symbolic modules or search controllers to enforce structured exploration. &lt;/p&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt; Second, training must prioritize process supervision, rewarding valid step-by-step reasoning over final-output mimicry. &lt;/p&gt;&lt;/li&gt;&lt;li class="listItem ultext"&gt;&lt;p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"&gt; Third, evaluation should evolve beyond accuracy metrics to audit reasoning traces for validity and efficiency. &lt;/p&gt;&lt;/li&gt;&lt;/ol&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28yjf9KIXZdsXoh1WlHvvKmOAjB7YJ03x7alXxVCrccEcD57_ieB5KqqXVBJ6j-uHVoEWgo95I0cBcPEyxXVsbrOzKSH8AQJZfXwHMBBvXm5rNILfJeKUY3VSrNpj2DtVQ/4h1/ta1qJhW0TuGlEE60_6KM6w/h10/h001.gVQFgiSujETptj78eHnX5ea6t6ivUBnexJX4eNfml8g" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"&gt; Read Full Paper &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""&gt;&lt;tr&gt;&lt;td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="learning-to-reason-without-external" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;Learning to Reason without External Rewards&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style=""&gt;&lt;i&gt;Zhao et al. [UC Berkeley, Yale University]&lt;/i&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(255, 58, 58);font-size:0.6rem;"&gt; ♥ 3.4k &lt;/span&gt;&lt;/span&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; &lt;/span&gt;&lt;span style="background-color:#e0e0e0;"&gt;&lt;span style="color:rgb(44, 129, 229);font-size:0.6rem;"&gt; LLM Reasoning &lt;/span&gt;&lt;/span&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="introduction-to-reasoning-in-ll-ms" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Introduction to Reasoning in LLMs&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Training large language models for complex reasoning often requires reinforcement learning with verifiable rewards (RLVR), which requires costly domain-specific supervision like gold-standard solutions or test cases. This limitation makes it hard to generalize this approach to broader applications.  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; This paper introduces Reinforcement Learning from Internal Feedback (RLIF) which is a new approach that enables models to learn from intrinsic signals without external oversight. The researchers have developed the INTUITOR method, an RLIF implementation that leverages a model&#39;s self-certainty as the sole reward signal. This approach eliminates the need for labeled data or verifiers, and offers a scalable path for autonomous AI systems where traditional rewards are impractical. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/c3a67eb4-2088-487f-99b4-16ecc6c94424/image.png?t=1748968145" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="working-mechanism-of-intuitor-frame" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Working Mechanism of INTUITOR Framework&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; INTUITOR uses the Group Relative Policy Optimization (GRPO), a reinforcement learning framework. Instead of using external rewards, it substitutes them with self-certainty scores derived from the model’s token-level predictions. Self-certainty is calculated as the average KL divergence between the model’s output distribution and a uniform distribution over the vocabulary. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/2c5a6f02-3a48-4846-8b29-93f2547440a8/image.png?t=1748968167" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Higher values of this divergence indicate greater confidence in generated tokens. During training, multiple candidate responses are sampled for each query. Their self-certainty scores are normalized to compute advantages, guiding policy updates toward high-confidence outputs. This process creates a self-reinforcing loop: the model iteratively refines responses to maximize its own confidence. For example, when encountering uncertain reasoning chains, INTUITOR encourages detailed step-by-step explanations until confidence rises. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The KL divergence penalty acts as a regularizer and prevents excessive deviation from the reference model. Additionally, self-certainty operates token-by-token, which makes it inherently process-aware rather than outcome-focused. This continuous signal helps the model build internal coherence without external validation. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/ca7d8749-77f9-411d-b8a4-f1543e05e52e/image.png?t=1748968218" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td id="benchmark-performance-of-intuitor-f" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"&gt;&lt;h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"&gt;&lt;span style="color:rgb(67, 67, 67);"&gt;Benchmark Performance of INTUITOR Framework&lt;/span&gt;&lt;/h3&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The researchers conducted experiments with Qwen2.5 models (1.5B and 3B parameters) trained on mathematical datasets that show INTUITOR matches GRPO’s in-domain performance. On MATH500 and GSM8K benchmarks, accuracy differences were barely noticeable. For instance, Qwen2.5-3B scored 79.2% (INTUITOR) versus 82.6% (GRPO) on GSM8K. Additionally, INTUITOR excels in generalization: it achieved a 65% relative improvement on LiveCodeBench code generation and a 76% gain on CRUXEval-O, outperforming GRPO’s 44%.  &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:600px;"&gt;&lt;img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/db64b173-a95e-4ab3-9a0c-61bd427be264/image.png?t=1748968239" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; The Online self-certainty method used an approach where rewards are computed using the evolving policy. This prevented reward hacking, unlike static offline versions. Tests showed that the KL penalty’s role in balancing generalization, especially for out-of-domain tasks. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28yjf9KIXZdsXoh1WlHvvKlx4sSXbazLI4P5C6vXSFq5nE6fypHrgZACoweWpW6wyFTo-ASfNAhymNgDBZgiQOIkKlY21dtYrKHYoj7ucBBWkMyH03-1NGNtLFQaNJwRyQ/4h1/ta1qJhW0TuGlEE60_6KM6w/h11/h001.ccQ61bOQVBuqtwRPGCJj4Th9AP273QcVwagTSQnjSYg" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"&gt; Read Full Paper &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="font-size:0px;line-height:0px;padding:30px 0px 30px;" class="dd"&gt;&lt;table class="j" role="none" width="50%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt; &nbsp; &lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"&gt;&lt;h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"&gt;Share The AI Timeline&lt;/h2&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; You currently have &lt;strong&gt;0&lt;/strong&gt; referrals. &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; display:none;width:0px;max-height:0px;overflow:hidden;mso-hide:all;height:0;font-size:0;max-height:0;line-height:0;margin:0 auto;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 0;"&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="width:300px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsGNUqyW5TiZkyMsF1yreu0byy2KW36J1wDdpoLuXg2TU1F1OW8OHoHaU4-ZmrZpPU4RN-crQCEimD190CSn9fPuxpIRojBJyu1VfV5KtQD3QMVdSg2JrjEj5-xm4r4E12Whf08itqPCb9Q5W0X4rt3ubYkqCmWnLeZpmb3_RZcbIk0UE5wZnFLCQJHLFs0qZ0OGpXp89o1HU4mWIBur5Or4tQGm5M_Y8m5PvTEfYfxLRyrcRv7GyVs5oLtFfiySZ2SqtZypLA-h50h61p0uPiA7iA_PiMqlVLtM-87XL33VZi05_O3UTpWE_0nAzFRJ4TW1ayz3_vn4Zlp9IERdbnnAd_1kPLD4lAQcR5PRXgtpCGWJ3w5KwfGbfMtHatGAawt_XeXAIZ7z3j_V28rhFQptVf28omNbMsgTFCAq8FBHHRfMyXqTd3Cv2KTufRaCKT7E6iqFK1L559EvOxHdkBn8i5U1b5w4ZzFUvkCynQfYqy3MoBoQiC2TOlkEIzgp-6DiKDuucRqb7RkFe5EBnhuEr3AYidmggXujT1iTgLRnx/4h1/ta1qJhW0TuGlEE60_6KM6w/h12/h001.lY0rVAm-7U5yDhPVI-S8ibB5RRrRPpvR3KYBmLvPijY" rel="noopener noreferrer nofollow" style="text-decoration:none;" target="_blank"&gt;&lt;img src="" alt="" height="auto" width="300" style="display:block;width:100%;" border="0"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:left;width:100%;word-break:break-word;" class="dd"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="left" style="margin:14px auto 14px auto;"&gt;&lt;tr&gt;&lt;td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsGNUqyW5TiZkyMsF1yreu0byy2KW36J1wDdpoLuXg2TU1F1OW8OHoHaU4-ZmrZpPU4RN-crQCEimD190CSn9fPuxpIRojBJyu1VfV5KtQD3QMVdSg2JrjEj5-xm4r4E12Whf08itqPCb9Q5W0X4rt3ubYkqCmWnLeZpmb3_RZcbIk0UE5wZnFLCQJHLFs0qZ0OGpXp89o1HU4mWIBur5Or4tQGm5M_Y8m5PvTEfYfxLRyrcRv7GyVs5oLtFfiySZ2SqtZypLA-h50h61p0uPiA7iA_PiMqlVLtM-87XL33VZi05_O3UTpWE_0nAzFRJ4TW1ayz3_vn4Zlp9IERdbnnAd_1kPLD4lAQcR5PRXgtpCGWJ3w5KwfGbfMtHatGAawt_XeXAIZ7z3j_V28rhFQptVf28omNbMsgTFCAq8FBHHRfMyXqTd3Cv2KTufRaCKT7E6iqFK1L559EvOxHdkBn8i5U1b5w4ZzFUvkCynQfYqy3MoBoQiC2TOlkEIzgp-6DiKDuucRqb7RkFe5EBnhuEr3AYidmggXujT1iTgLRnx/4h1/ta1qJhW0TuGlEE60_6KM6w/h13/h001.m_1VaLeh7filnA0btN2xbDxr7XCHjLeYHl_vNdpLmI8" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"&gt; Click to Share &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" align="left" style="padding:0px 28px 0px 28px;text-align:left;word-break:break-word;"&gt;&lt;p style="mso-line-height-alt:150.0%;"&gt; Or copy and paste this link to others: &lt;a class="link" href="https://mail.bycloud.ai/subscribe?ref=6SqUHb8KiF&_bhlid=bf7a73b936aab597b0df9777ef50b28c5a049d32" target="_blank" rel="noopener noreferrer nofollow" clicktracking="off"&gt;&lt;span&gt;https://mail.bycloud.ai/subscribe?ref=6SqUHb8KiF&lt;/span&gt;&lt;/a&gt;&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top" style="font-size:0px;line-height:0px;padding:30px 0px 30px;" class="dd"&gt;&lt;table class="j" role="none" width="50%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt; &nbsp; &lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="dd" style="padding: 20px;"&gt;&lt;table width="100%" cellpadding="0" cellspacing="0" role="none" style="max-width:520px;margin:0 auto;"&gt;&lt;tr&gt;&lt;td class="q" style="padding:16px 16px 6px 16px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoDDFT6eh5Nsg0xYVQj-h6I3o9m2k79_qw4izMYhmcI36q8ZwFCKttNCHBF4s86UrmwPLRiqQI99i1Tucph32rn40yYa65fQIJK30KucHIIsKu8OuBe34EzLkTiN1OqWZOosFUsvqQiUQ70A5VAaZGuQ/4h1/ta1qJhW0TuGlEE60_6KM6w/h14/h001.uYE4R-N50YwLS0JoK7eCW9WX3NceYBpxk2DokuQC6kg" style="text-decoration:none !important;"&gt;&lt;table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"&gt;&lt;tr&gt;&lt;td width="100%" style="padding: 0 0 14px 0;text-decoration:none;width:100%;"&gt;&lt;table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"&gt;&lt;tr&gt;&lt;td width="36" style="width:36px;"&gt;&lt;img src="https://pbs.twimg.com/profile_images/1698572487909400576/BvncwnrP_normal.jpg" alt="tw profile: The AI Timeline" style="display:block;width:36px;height:36px;border-radius:50%;border:0;"/&gt;&lt;/td&gt;&lt;td width="400" style="padding:0 0 0 8px;text-decoration:none;"&gt;&lt;span style="display:block;font-size:14px;color:#1c2022;font-weight:700;"&gt; The AI Timeline &lt;/span&gt;&lt;span style="display:block;color:#697882;font-size:14px;"&gt; @TheAITimeline &lt;/span&gt;&lt;/td&gt;&lt;td width="24" align="right" style="vertical-align:text-top;"&gt;&lt;img width="24" height="24" loading="lazy" alt="tw" style="border:0;" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_logo.png"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="word-break:break-word;"&gt;&lt;p&gt;🚨This week's top AI/ML research papers:&lt;/p&gt;&lt;p&gt;- Spurious Rewards &lt;br&gt;- FLUX.1 Kontext &lt;br&gt;- Learning to Reason without External Rewards &lt;br&gt;- Reasoning LLMs are Wandering Solution Explorers &lt;br&gt;- VLM-3R &lt;br&gt;- Silence is Not Consensus &lt;br&gt;- Beyond Markovian &lt;br&gt;- The Entropy Mechanism of RL for Reasoning LMs &lt;br&gt;-&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td style="padding:12px 0 0 0;"&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" style="padding:8px 0 0 0;width:480px;"&gt;&lt;img src="https://pbs.twimg.com/media/GsYRiC3WwAA5QJp.jpg" width="480" height="auto" style="display:block;border:1px solid #E1E8ED;border-radius:5px;width:100%;max-width:480px;height:auto;"/&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="8" style="line-height:1px;font-size:1px;height:8px;"&gt;&nbsp;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="s"&gt;&lt;p&gt;6:57 PM • Jun 1, 2025&lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="10" style="line-height: 1px; font-size: 1px; height: 10px;"&gt;&nbsp;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="1" bgcolor="#e1e8ed" style="line-height:0px;font-size:0px;height:1px;"&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="10" style="line-height:1px;font-size:1px;height:10px;"&gt;&nbsp;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="s"&gt;&lt;p&gt;&lt;b style="color:#1C2022"&gt;681&lt;/b&gt; Likes &nbsp; &lt;b style="color:#1C2022"&gt;69&lt;/b&gt; Retweets &nbsp; &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" class="s"&gt;&lt;div align="center" style="text-align:center;margin-top:4px;margin-bottom:4px;padding:8px;border:1px solid #ccd6dd;border-radius:9999px;color:#1B95E0"&gt;&lt;b&gt;7 Replies&lt;/b&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt;&lt;tr&gt;&lt;td class="b" align="center" valign="top" bgcolor="#2a2a2a" style="padding:0px 0px 0px 0px;border-style:solid;border-width: 0px 0px 0px 0px;border-color: #2a2a2a;border-bottom-left-radius:10px;border-bottom-right-radius:10px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top" bgcolor="#73ddff" style="padding:12px"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td&gt;&lt;span style="padding-left:1px;"&gt;&lt;/span&gt;&lt;/td&gt;&lt;td align="center" valign="middle" width="75" style="width:75px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.1muhFWIqieRYpaJ-FbWSCQqcWoV4NNHHr5SkP9THApWuHAAlWLQxI3Q_IqFmt_DcyAxeC8jDApCnHmMSBGpBb5sgtimvBYgxRX-Rp7s0F3LjCHoSwdhr83OBqRFhJ1y_/4h1/ta1qJhW0TuGlEE60_6KM6w/h15/h001.FDevNlA8AnK0IQx5_cjZDUoyNa2_Ib6A_ay_nZpYVp0" style="text-decoration:none;"&gt;&lt;img width="22" height="22" alt="tw" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_dark.png"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;td align="center" valign="middle" width="75" style="width:75px;"&gt;&lt;a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmBoQnQ9VXnB2zTxBG4HeHBgjMqVxpoXRdj01cjwyoVlHgiebEOgBvwHtevoVpsSvpn3Q1di2ml6sb3cBM-X6IStQbj_zQSVGWJ8AAmPw2en2/4h1/ta1qJhW0TuGlEE60_6KM6w/h16/h001.i4uu6o5J6OrZ5rZpAvJsgiuUL-hzlnVxGI_yYFein6o" style="text-decoration:none;"&gt;&lt;img width="22" height="16" alt="yt" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/youtube_dark.png"/&gt;&lt;/a&gt;&lt;/td&gt;&lt;td&gt;&lt;span style="padding-left:1px;"&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td height="10" style="line-height:1px;font-size:1px;height:10px;"&gt; &nbsp; &lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="w" align="center" valign="top" style="padding:15px 15px 15px 15px;"&gt;&lt;table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"&gt;&lt;tr&gt;&lt;td align="center" valign="top"&gt;&lt;p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"&gt; Update your email preferences or unsubscribe &lt;a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsBhEpz-DJgyVFmavJPa0OyKRRnvw4o7XGyvIv7PRofnmnP3rR87lMjQTvIf2D8fDkrlOeH-UR58QKHW1S0Qojh6Wxle7a89qZt_YqiTekZylUSewzWcHkTJOHebQP8hk_tYN6L0h0gdAab5OKd3g0aIjSuwDsbZiETwGtbYL_wXCqQ6vrVpGZn0qxDaaqO94IGMobux-nBZ8yKPYmayNHCLuqt01VSKMhTm4ktcNaKUdactzm2KtyW-yBAchtSKdIyqGsV5bkMjC4toLBBcY5V8jofxAWo08yNkFiJkqMQwQTLAbV6LcSJmtXzrac2aC6PD6B1qyfVEiE_OKIzOMWM6_3zTTqLZM3yc6wetPKZYPv7WuDVlNJebJ_bW3SJx14XsmFFNrDXtgKDEn_fkRYdOk4Unl-4V8L8CRYJYSndpTJlBGbuZ3AwufedRCZK03BhRkTTeCKWSNwtQ9bIYdxUgZii_XarzVBYRerIwVGIimrOluj4H6w-QUJ24UkMchjidFA7Cg6y9iqh6wMKsOXPX7u8kQzDWL9pLQfHGkgqCKId4hsVxCwlDFyYpS-IyMN3tmJiF9vq0L3Kj36XSzihxw1kWZaxn2oiQ7z1MKD7qoaQcdf2lyh-F-l05uOQjM22nBi3PsYe-Akp3RsXf1sZxspwavYi_zNmBLDjaSQk4CCkwJzqGmdnXWsrxS38PmPMPfUUx_7fpy7hWm56IQC8tgdB93dDKli5Vnp1qtQZ-KmV7W9RzJms9uWuYHnNXhCA/4h1/ta1qJhW0TuGlEE60_6KM6w/h17/h001._MaHy6pqbGNCrW7WhjaQCm66VPCNHu7iRhQolbzaN_A" style="text-decoration:underline;text-decoration-color:#FFFFFF!important;color:#FFFFFF!important;"&gt; here&lt;/a&gt;&lt;/p&gt;&lt;p class="copyright" style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"&gt; &copy; 2025 bycloudai &lt;/p&gt;&lt;p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"&gt; 228 Park Ave S, #29976, New York, New York 10003, United States &lt;/p&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr style="display: table-row !important;"&gt;&lt;td align="center" valign="top" style="padding-top:20px;" style="display:table-cell !important;"&gt;&lt;table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="display:table !important;"&gt;&lt;tr style="display:table-row !important;"&gt;&lt;td class="u" align="center" valign="middle" height="32" style="height:32px;display:table-cell !important; max-height: 32px !important;margin:0px !important; background-color: #ffffff !important;"&gt;&lt;a style="line-height:32px !important;text-decoration:none;display:block !important;" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28olDWFpV5DDKfdk_OdOKOgzQScRXqK49nG7X8IbImKu6VlPACp2Qug-aeDsbqWwolhrOc9lG9ROoGsxRzs7OUEirCM89ctC9hRw9CyuG4Tf50-HrJlmuOx8avGqcRNhPwb_QAuzDdsLNrdYOsOdp1hlzTVuDc-ZK04W9LeWeVBlFqwVAExDJe0DitBrl7XQdeWvO8fF__40FlbWUm7BH9YEIpcBUbViHjWqfC7ANsaF/4h1/ta1qJhW0TuGlEE60_6KM6w/h18/h001.cO3_1CwHTeOACVkTJAY-oLlzHAoZ8HPYpQVTxZZaxY0"&gt;&lt;img src="https://media.beehiiv.com/output-onlinepngtools.png" width="16" alt="beehiiv logo" style="display:inline-block !important;max-width:16px !important; vertical-align:-3px !important;width: 16px !important;" border="0"/&gt;&lt;span style="padding-left:11px !important;display: inline-block !important;"&gt;Powered by beehiiv&lt;/span&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="left" valign="top" height="2" style="height:2px;"&gt;&lt;a href='https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWsHIaP4XNp0WgUYqLvHcKk_3uqk_KIkz4ddLinhFbud6JuxLFdSUhYnR7b1NSsmbtzXNGNblnEEMKUtkCAjkn8Y/4h1/ta1qJhW0TuGlEE60_6KM6w/h19/h001.yYFZI3J_NyluQ4ODGDOIdklFYly2-93auISls3HyUHQ' style="color: #2a2a2a !important; cursor: default; font-size: 1px; text-decoration: none;"&gt; Terms of Service &lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/div&gt;&lt;/body&gt;&lt;/html&gt;
Email Content