<!DOCTYPE html><html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" style="font-size:16px;"><head></head><head><meta charset="utf-8"/><!--[if !mso]><!--><meta http-equiv="X-UA-Compatible" content="IE=edge"/><!--<![endif]--><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="x-apple-disable-message-reformatting"/><meta name="format-detection" content="telephone=no,address=no,email=no,date=no,url=no"/><meta name="color-scheme" content="light"/><meta name="supported-color-schemes" content="light"/><title>The Leaderboard Illusion</title><!--[if mso]><xml><o:OfficeDocumentSettings><o:AllowPNG/><o:PixelsPerInch>96</o:PixelsPerInch></o:OfficeDocumentSettings></xml><![endif]--><style> :root { color-scheme: light; supported-color-schemes: light; } body { margin: 0; padding: 0; min-width: 100%!important; -ms-text-size-adjust: 100% !important; -webkit-transform: scale(1) !important; -webkit-text-size-adjust: 100% !important; -webkit-font-smoothing: antialiased !important; } .body { word-wrap: normal; word-spacing:normal; } table.mso { width: 100%; border-collapse: collapse; padding: 0; table-layout: fixed; } img { border: 0; outline: none; } table { mso-table-lspace: 0px; mso-table-rspace: 0px; } td, a, span { mso-line-height-rule: exactly; } #root [x-apple-data-detectors=true], a[x-apple-data-detectors=true], #MessageViewBody a { color: inherit !important; text-decoration: inherit !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important; } span.MsoHyperlink { color: inherit !important; mso-style-priority: 99 !important; } span.MsoHyperlinkFollowed { color: inherit !important; mso-style-priority: 99 !important; } .a { background-color:#dedede; } .b { background-color:#2a2a2a; } .c { background-color:#ffffff; } .d { background-color:#fff0c8; } .d2 { background-color:#FFFFFF; } .d3 { background-color:#FFFFFF; } h1 a { text-decoration:none;color:#2C81E5;font-style:italic; } h2 a { text-decoration:none;color:#2C81E5;font-style:italic; } h3 a { text-decoration:none;color:#2C81E5;font-style:italic; } h4 a { text-decoration:none;color:#2C81E5;font-style:italic; } h5 a { text-decoration:none;color:#2C81E5;font-style:italic; } h6 a { text-decoration:none;color:#2C81E5;font-style:italic; } h1, h1 a, h2, h2 a, h3, h3 a, h4, h4 a, h5, h5 a, h6, h6 a, ul, li, ol, p, p a { margin: 0;padding: 0; } h1 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:28px;color:#2A2A2A;line-height:42px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h2 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:24px;color:#2A2A2A;line-height:36px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h3 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:20px;color:#2A2A2A;line-height:30px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h4 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:18px;color:#2A2A2A;line-height:27px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h5 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:16px;color:#2A2A2A;line-height:24px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h6 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:14px;color:#2A2A2A;line-height:21px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } p { font-family:'Georgia','Times New Roman',serif;font-weight:400;color:#2D2D2D;font-size:16px;line-height:24px;padding-bottom:8px;padding-top:8px;mso-margin-top-alt:8px;mso-margin-bottom-alt:8px; } p a, .e a, ul a, li a, .h a, .h2 a, .h3 a { word-break:break-word;color:#2C81E5 !important;text-decoration:none;font-style:italic; } p a span, .e a span, ul a span, li a span { color: inherit } p .bold { font-weight:bold;color:#2D2D2D; } p span[style*="font-size"] { line-height: 1.6; } .f p { font-size:12px;line-height:15px;color:#2D2D2D;padding:0; } .f p a { color:#2D2D2D !important; } .g p { font-family:'Helvetica',Arial,sans-serif;font-size:14px;line-height:20px;font-weight:normal;margin:0; } .g p a { text-decoration: underline; } .i p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; } .i p a { color:#2D2D2D !important; } .i2 p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; } .i2 p a { color:#2D2D2D !important; } .i3 p { font-family:'Helvetica',Arial,sans-serif;line-height:43px;font-size:24px;color:#2D2D2D; } .i3 p a { color:#2D2D2D !important; } .h p a { color:#595959 !important; } .h2 p a { color:#595959 !important; } .h3 p a { color:#595959 !important; } .f p a, .i p a, .i2 p a, .i3 p a, .h p a, .h2 p a, .h3 p a { text-decoration:underline; } .j { border-top:3px solid #ffeb2d; } .k p { padding-left:15px;padding-bottom:0px;padding-top:6px;mso-margin-top-alt:6px;mso-margin-bottom-alt:0px;mso-margin-left-alt:15px; } .o { background-color:#FFFFFF;border:1px solid #F1F1F1;border-radius:5px; } .o p { font-family:'Helvetica',Arial,sans-serif;padding:0px;margin:0px; } .l p, .l p a { font-size:14px;line-height:20px;font-weight: bold;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; } .m p, .m p a { font-size:13px;line-height:18px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; } .n p, .n p a { font-size:12px;line-height:17px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; } .p { background-color:#FFFFFF;max-width:520px;border:1px solid #E1E8ED;border:1px solid rgba(80, 80, 80, 0.3);border-radius:5px; } .q { font-size:16px;font-family:Helvetica,Roboto,Calibri,sans-serif !important;border:1px solid #e1e8ed;border:1px solid rgba(80, 80, 80, 0.3);border-radius:10px;background-color:#FFFFFF; } .q p { font-size:16px;font-family:system-ui,Helvetica,Roboto,Calibri,sans-serif !important;color:#222222;padding:4px 0; } .r { border:1px solid #E1E8ED !important;border-radius:5px; } .s p { font-size: 14px; line-height: 17px; font-weight: 400; color: #697882; text-decoration: none; } .t p { font-family:'Helvetica',Arial,sans-serif;font-size:12px;line-height:18px;font-weight:400;color:#000000;font-style:italic;padding:4px 0px 0px; } .v { border-radius:10px;border:solid 0px #DFD150;background-color:#2C81E5;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;color:#FFFFFF; } .v a { text-decoration:none;display:block;color:#FFFFFF; } .w p { font-size:12px;line-height:15px;font-weight:400;color:#FFFFFF; } .w p a { text-decoration: underline !important;color:#FFFFFF !important; } ul { font-family:'Helvetica',Arial,sans-serif;margin:0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:disc;font-size:16px; } ul > li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:disc; } ol { font-family:'Helvetica',Arial,sans-serif;margin: 0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:decimal;font-size:16px; } ol > li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:decimal; } .e h3, .e p, .e span { padding-bottom:0px;padding-top:0px;mso-margin-top-alt:0px;mso-margin-bottom-alt:0px; } .e span, .e li { font-family:'Helvetica',Arial,sans-serif;font-size:16px;color:#2D2D2D;line-height:24px; } .rec { font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji" !important; } .rec__button:hover { background-color: #f9fafb !important; } .copyright a {color: inherit !important; text-decoration: none !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important;} .txt_social p { padding: 0; word-break: break-all; } .table, .table-c, .table-h { border: 1px solid #C0C0C0; } .table-c { padding:5px; background-color:#FFFFFF; } .table-c p { color: #2D2D2D; font-family:'Helvetica',Arial,sans-serif !important;overflow-wrap: break-word; } .table-h { padding:5px; background-color:#F1F1F1; } .table-h p { color: #2A2A2A; font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif !important;overflow-wrap: break-word; } @media only screen and (max-width:667px) { .aa { width: 100% !important; } .bb img { width: 100% !important; height: auto !important; max-width: none !important; } .cc { padding: 0px 8px !important; } .ee { padding-top:10px !important;padding-bottom:10px !important; } .ff ul, .ff ol { margin: 0px 0px 0px 10px !important;padding: 0px !important; } .ff li { margin:10px 0px 0px 10px !important; } .r {height:140px !important;} .s p { font-size:13px !important;line-height:15px !important; } .mob-hide {display:none !important;} .mob-stack {display:block !important;width:100% !important;} .mob-w-full {width:100% !important;} .mob-block {display:block !important;} .embed-img {padding:0px 0px 12px 0px !important;} .socialShare {padding-top:15px !important;} .rec { padding-left:15px!important;padding-right:15px!important; } .bodyWrapper { padding:7px 4px 7px 4px !important; } .social-mobile {float:left !important;margin-top:10px !important;} } @media screen and (max-width: 480px) { u + .a .gg { width: 100% !important; width: 100vw !important; } .tok-heart { padding-top:75% !important; } .tok-play { padding-top: 250px !important; } } @media screen and (max-width: 320px) { .tok-heart { padding-top:65% !important; } } .u { border: 1px solid #CACACA !important; border-radius: 2px !important; background-color: #ffffff !important; padding: 0px 13px 0px 13px !important; font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif !important;font-size: 12px !important; color: #767676 !important; } .u a { text-decoration: none; display: block !important; color: #767676 !important; margin: 0px !important; } .u span, .u img { color: #767676 !important;margin:0px !important; max-height:32px !important;background-color:#ffffff !important; } </style><!--[if mso]><style type="text/css"> sup { font-size: 100% !important;vertical-align: .5em !important;mso-text-raise: -1.5% !important;line-height: 0 !important; } ul { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; } ul li { margin-left: 0px !important; mso-special-format: decimal; } ol { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; } ol li { margin-left: 0px !important; mso-special-format: decimal; } li.listItem { margin-left:15px !important; margin-top:0px !important; } .paddingDesktop { padding: 10px 0 !important; } .edm_outlooklist { margin-left: -20px !important; } .embedImage { display:none !important; } </style><![endif]--><style> @font-face { font-family: 'Open Sans'; font-style: normal; font-weight: 700; font-display: swap; src: url('https://fonts.gstatic.com/s/opensans/v40/memSYaGs126MiZpBA-UvWbX2vVnXBbObj2OVZyOOSr4dVJWUgsg-1x4gaVIUwaEQbjA.woff2') format('woff2'); } @font-face { font-family: 'Open Sans'; font-style: italic; font-weight: 700; font-display: swap; src: url('https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@1,700&display=swap') format('woff2'); } </style></head><body class="a" style="margin:0px auto;padding:0px;word-wrap:normal;word-spacing:normal;background-color:#dedede;"><div role="article" aria-roledescription="email" aria-label="email_name" lang="en" style="font-size:1rem"><div style="display:none;max-height:0px;overflow:hidden;"> Plus more about Phi-4-reasoning Technical Report and Softpick: No Attention Sink, No Massive Activations with Rectified Softmax  ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ </div><table role="none" width="100%" border="0" cellspacing="0" align="center" cellpadding="0" class="gg"><tr><td align="center" valign="top"><table role="none" width="670" border="0" cellspacing="0" cellpadding="0" class="aa" style="width:670px;table-layout:fixed;"><tr><td class="bodyWrapper" align="center" valign="top" style="padding:7px 7px 7px 7px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top" style="border-width:0px 0px 0px 0px;border-style: solid; border-color: #2a2a2a;border-radius:10px 10px 0px 0px;background-color:#ffffff;" class="c"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr id="header"><td style="padding:28px 28px 0px 28px;"><div style="padding-top:0px;padding-right:0px;padding-bottom:20px;padding-left:0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td class="f" align="right" valign="top"><p> May 06, 2025 | <a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EwMVqNcrjoP4XNE4y2uKAZlgyoAbav5fcIXpamYoFKeBSGxOmEL_igJ4qqgSy_nVnSGtWx-7kV9GJbPLsFrO714cITXngkTcpFlLTLsPIAKXM2MEXtWIddHj7Xj7oBumi5ZOcIYtpSxtyHdXo5kopsf3ptvE9ulmcg2DowsybTCm9RLbPgcfv-g3bwqmgS_2okjLMAeEPMW_tT6mPgoNQiwFfROx32H_LLHDwOVDeYtX1PxbNoDLhXM0JnV4bSlyIAK_IQ72l6WiFVcfC93mtnYQDMKKFDUTwGrSqFMV99O5qulyexiWPBT0vhYAdzr9qwhPffdDZMZvIAOxV9nHulmQdi80H9IVQtnFyg9x3bbf7XuhQP1CweDN3uxzbaJHciFfTbETj8SsFIz6eJVK-hv35lf4AIu24JSr-AvkvUUc-lSJNrX-6vDfJEULYvUYJBbgCZwtiqlpASkDfbZIqSRBxr1vdkV0oReVGHQQ-Qjs7v99ObiwX7CFjPRUjSZMlx-3cW7NIaYQ9QELQgzTF_wgqWyFeprxHY-KryoCdtKiHtYAWuvuJ40cWTaRhrKpCYZ7DbQHkOKdhfM8CmZiMPbEM1qQX5hI9cVz7J4qnllYnhe1xg2gLgJ2ULqmsbJfG8/4g9/JUGeMKnaREaBBfh30JSrTQ/h0/h001.MXnJfWQma0OlpOLwTDBkMNm8WhJat9owCJfVXn_Tbx8"><span class="translation_missing" title="translation missing: en.templates.posts.email.v3.header.read_online">Read Online</span></a></p></td></tr><tr><td class="dd" align="center" valign="top" style="padding:15px 0;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top"><h1 style="text-align:left;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-weight:Bold;font-size:32px;color:#2A2A2A;padding:2px 0;line-height:38px;"> The Leaderboard Illusion </h1><p style="text-align:left;font-family:'Helvetica',Arial,sans-serif;font-weight:normal;font-size:20px;color:#3E3E3E;padding:5px 0;line-height:24px;"> Plus more about Phi-4-reasoning Technical Report and Softpick: No Attention Sink, No Massive Activations with Rectified Softmax </p></td></tr></table></td></tr><tr><td style="height:0px;width:0px;"><div style="height:1px;" data-open-tracking="true"> <img src="https://elink4f7.mail.bycloud.ai/ss/o/u001.3wmUuY8gEWd4_869a_eXcg/4g9/JUGeMKnaREaBBfh30JSrTQ/ho.gif" alt="" width="1" height="1" border="0" style="height:1px !important;width:1px !important;border-width:0 !important;margin-top:0 !important;margin-bottom:0 !important;margin-right:0 !important;margin-left:0 !important;padding-top:0 !important;padding-bottom:0 !important;padding-right:0 !important;padding-left:0 !important;"/> </div></td></tr></table></div></td></tr><tr id="content-blocks"><td class="email-card-body" align="center" valign="top" style="padding-bottom:28px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td id="nov-18-th-nov-24-th-33-latest-ai-re" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h6 style="color:#2A2A2A;font-weight:normal;"><i>Apr 28th ~ May 4th</i><br><i>#54 Latest AI Research Explained Simply</i></h6></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="industry-news-in-1-line" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;">🗞️ Industry News in 1 Line</h2></td></tr><tr><td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"><div style="margin-left:0px;" class="edm_outlooklist"><ol start="1" style="list-style-type:decimal;margin:0px 0px;padding:0px 0px 0px 0px;"><li class="listItem ultext"><p style="line-height:24px;padding:0px;text-align:left;word-break:break-word;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;">♥ 452 </span></span> Allen Institute released <b>OLMo 2 1B</b>, a small yet powerful model trained on 4T tokens with advanced tuning methods, designed for efficient research iteration and outperforming peers like Gemma 3 1B and Llama 3.2 1B. Now available on <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWi7X66MaTAPRoYh4-vrMs1rzlRp3XdIq4RIur3dtcazZ5jsAQrEtdRBpnd0CHVqhnOFBRlaNrfC-19xCv6O0T-p3a1paaCCIOncmEEw_9ntxlU1sfFhC8T7M2_0NnO8rNw/4g9/JUGeMKnaREaBBfh30JSrTQ/h1/h001.n1KMF_DY0n9nF9lsm3X4fZHPkIZNlVIiuyhRJn5dI88" target="_blank" rel="noopener noreferrer nofollow"><span>Huggingface</span></a>. </p><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:510px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/91ca83f1-8825-4bad-91df-c27d83b2cdb4/image.png?t=1746551756" alt="" height="auto" width="510" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:510px;"><p>OLMo 2 1B benchmark</p></td></tr></table></li><li class="listItem ultext"><p style="line-height:24px;padding:0px;text-align:left;word-break:break-word;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;">♥ 683 </span></span> Nous Research announced <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.VomAAYwkCjux8i_FMc4kJaiJNraOmctPvBkHxTYEVrt_lZtonqST3ZuB_b9n3FNg_YW2QQ7nn9GtCo98uNxlQS8tClzrN3OWqgd5USx7L8D9jrBYFTd5ApwfB4G_6Vgq/4g9/JUGeMKnaREaBBfh30JSrTQ/h2/h001.8CCbsMYuJx_lF_IvCC9PAXun-EL9PJSWN-VeUmwcG8I" target="_blank" rel="noopener noreferrer nofollow"><span>Atropos</span></a>, a Language Model Reinforcement Learning Environments framework for collecting and evaluating LLM trajectories through diverse environments. Additionally, they are hosting a hackathon surrounding Atropos on <b>May 18th</b> with a <b>prize pool of $50,000 </b>in San Francisco. You can sign up <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.ZsobsZmG6kUZ4LjqczYBVBAKm_vJJAKCAPAZX_R3CJ6Kj0mniXDt6EGFIiWJVTAuVVGMcWMmAViTu79FlpWreALYpJw_dORBR9Qig705cnkvrcrb8DTBpom1Jo3Eb98b6xhOLOwHMTQ_h-AIz_lOoXlQAVrPk9ECs02K3JeaNLblbICqYtXkVgwo55d_4Q9T/4g9/JUGeMKnaREaBBfh30JSrTQ/h3/h001.i2S9YmqLDt3Z-Bi5IDNHFx1uwnWtyql4lxkcIckB-cA" target="_blank" rel="noopener noreferrer nofollow"><span>here</span></a>. </p><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:420px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.ZsobsZmG6kUZ4LjqczYBVBAKm_vJJAKCAPAZX_R3CJ6Kj0mniXDt6EGFIiWJVTAuVVGMcWMmAViTu79FlpWreALYpJw_dORBR9Qig705cnlnbaezM_h93dkEpfc5qusfQHHsGs8HJalWnKSSz0AS4KIfO3spi_Px7yfcqsOJXn39RAaofPGyfTIRml6OYxkr/4g9/JUGeMKnaREaBBfh30JSrTQ/h4/h001.gzRt_1JdkVvGWbE4-eBtoWlRtBj96S3H1PlNCmFPS1Y" target="_blank" rel="noopener noreferrer nofollow" style="text-decoration:none;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/065c2fc5-8386-4213-816a-da0116b81c50/image.png?t=1746551386" alt="" height="auto" width="420" style="display:block;width:100%;" border="0"/></a></td></tr></table></li></ol></div></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="transparent" style="background-color:transparent;border-color:#2C81E5;border-style:solid;border-width:5px;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;"><span style="">LTX Video Release: Fastest & GPU-friendly Video Gen Model</span></h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">LTX Studio has announced a new version of LTX Video, their latest video generation model which offers faster rendering performance, with up to</span><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;"><b> 30x faster than competitors</b></span><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;"> while maintaining high visual quality.</span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">The highlight of LTX Video is that they are entirely open-source under an </span><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;"><b>OpenRAIL license</b></span><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;"> (permitting free commercial use for businesses under $10M revenue) and are designed to run locally on consumer GPUs. </span></p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.-a_uZGcEKk2OBxkkwsTf3ifw2TtzAnLOnyIg8KXL_KMiJXJ1qARx7e-PqKlSfarBXwQVg1SbNM4ml-rV28fK45_IdYiF0YBkRumoWxzRlIpwFlDTLr-UIX-p41Dw8DHoIo-63bfvRlaqWt-P0XbMuyG_OYQVodE4Odm5WMXMGcAY5y8xBIO2pzuV_tTNk5M3KucBgF9dAoLsZ-xFc3k79mHRdDgLiTzRK8tR7Ck8cg5LpZuW_HoMZ0BxkeZ4C6Mz/4g9/JUGeMKnaREaBBfh30JSrTQ/h5/h001.T033cNh7QQkd9xx2K44Gvvulu9cnFZeZ39FaiAY_Ct4" rel="noopener noreferrer nofollow" style="text-decoration:none;" target="_blank"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/294fbc34-7065-4f32-b186-edb4903c45e2/LTXV_gif.gif?t=1746484342" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></a></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><span style="">Key features include realistic multiscale rendering (this create videos hierarchically, starting with a low-resolution structure and progressively refining it with finer details at higher resolutions) and creative controls like setting start and end keyframes via the LTX Studio platform. This provides a new option for efficient,</span><span style=""><b> locally run AI video creation</b></span><span style="">, compared to existing options.</span></p></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.-a_uZGcEKk2OBxkkwsTf3ifw2TtzAnLOnyIg8KXL_KMiJXJ1qARx7e-PqKlSfarBXwQVg1SbNM4ml-rV28fK45_IdYiF0YBkRumoWxzRlIpwFlDTLr-UIX-p41Dw8DHoIo-63bfvRlaqWt-P0XbMuyG_OYQVodE4Odm5WMXMGcAY5y8xBIO2pzuV_tTNk5M3cvuMrXSsc4eP6Y7NHoSq9ljebR3oyJp5BPNEduDZ0m-8A14gBrdXp8WiwLTkqn0F/4g9/JUGeMKnaREaBBfh30JSrTQ/h6/h001.mE_-L_xLuY_CLFkNLsaiCLepgajln744y4MPAk0xrmQ" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Check Out LTX Studio </a></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><span style=""><a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoGymQ3NNPtd5dE5MV_8UgjIDFPVXngz8pvQBldSW42yhUe_Qiq6DgEPMEBuPL9yfRpXelTiuu2kS8pLFvsoem_XoZoy_n13sTKUhZIbl0VH6/4g9/JUGeMKnaREaBBfh30JSrTQ/h7/h001.CyoUR6C6jPshmHeEwLP9Sef2LoSvIeZMrLJyLmekLoo" target="_blank" rel="noopener noreferrer nofollow"><span>Advertise with The AI Timeline! </span></a></span></p></td></tr></table></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="the-leaderboard-illusion" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;">The Leaderboard Illusion</h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><span style=""><i>Singh et al. [Cohere, Princeton University, Stanford University, University of Waterloo, Massachusetts Institute of Technology, Allen Institute for Artificial Intelligence, University of Washington]</i></span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;"> ♥ 4.3k </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> LLM Benchmarks </span></span></p></td></tr><tr><td id="the-hidden-biases-and-problems-in-l" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;"><span style="color:rgb(67, 67, 67);">The Hidden Biases and Problems in LLM Benchmarks </span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> New LLM research papers often use benchmarks to guide AI research, but what happens when the benchmarks itself starts pointing in misleading directions? You would have already heard of Chatbot Arena, a crowdsourced platform where users compare anonymized model responses. Unlike static benchmarks, it adapts to real-world use cases. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> This paper shows that the <span style="font-weight:700;"><b>Chatbot Arena has systemic biases</b></span> that risk distorting the field’s perception of progress. From undisclosed private testing to unequal data access, the findings highlight how current practices favor a handful of major players, raising urgent questions about fairness and transparency in AI evaluation. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/fb7a0d26-9a2c-4e84-bed0-292792012938/image.png?t=1746546825" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Maximum observed sampling rate for models from different providers.</p></td></tr></table></td></tr><tr><td id="how-the-chatbot-arena-system-skews-" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;"><span style="color:rgb(67, 67, 67);">How the Chatbot Arena System Skews Results</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> Chatbot Arena’s ranking system uses the Bradley-Terry (BT) model, which is a statistical method for estimating skill levels from pairwise comparisons. The BT model assumes that it will receive unbiased sampling and every model will have an equal chance to prove itself. But private testing violates this principle. For instance, Meta tested 27 private variants of Llama-4 before launch, and selectively reported the best-performing version. Simulations show that testing just 10 variants <span style="font-weight:700;"><b>inflates a model’s perceived skill</b></span> by ~100 points. This is like a runner secretly entering multiple aliases in a race and claiming the fastest time. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/2d76c5a5-8cf0-45f7-8f76-b2658b758beb/image.png?t=1746546895" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Number of privately-tested models per provider based on random-sample battles (January – March 2025).</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> Additionally, the data access further exacerbates the problem because <span style="font-weight:700;"><b>proprietary models are sampled more frequently</b></span> and they appear in up to 34% of battles compared to 3–5% for open-source alternatives. This imbalance directly impacts the model’s performance on benchmark. Models trained on Arena-specific data achieve up to 112% higher win rates, as they adapt to the platform’s unique distribution of prompts and preferences. Over time, this creates a self-reinforcing cycle: dominant models get more data, which sharpens their edge, while others fall further behind. </p></td></tr><tr><td id="how-to-reform-llm-benchmarks-for-fa" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;"><span style="color:rgb(67, 67, 67);">How to Reform LLM Benchmarks for Fairer Evaluations</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> This paper clearly showed us that proprietary models consistently outperform open-source counterparts on the leaderboard, but this gap shrinks when controlling for data access and testing advantages. For example, when identical open-weight models were submitted under different aliases, their scores varied by up to 5%, purely due to sampling randomness. Similarly, silent deprecation of 205 models (mostly open-source) further destabilizes rankings, violating BT’s assumptions and eroding trust. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/5db8c4fb-898c-4811-aec9-05d9f96a076c/image.png?t=1746547013" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> To address these issues, the authors of this paper propose the following reforms: </p></td></tr><tr><td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"><div style="margin-left:0px;" class="edm_outlooklist"><ol start="1" style="list-style-type:decimal;margin:0px 0px;padding:0px 0px 0px 0px;"><li class="listItem ultext"><p style="line-height:24px;padding:0px;text-align:left;word-break:break-word;"><span style="font-weight:700;"><b>Ban score retraction</b></span>: All private tests must be publicly logged to prevent cherry-picking. </p></li><li class="listItem ultext"><p style="line-height:24px;padding:0px;text-align:left;word-break:break-word;"><span style="font-weight:700;"><b>Limit private variants</b></span>: Cap submissions to 3 per provider to curb overtesting. </p></li><li class="listItem ultext"><p style="line-height:24px;padding:0px;text-align:left;word-break:break-word;"><span style="font-weight:700;"><b>Equalize data access</b></span>: Allocate battles and deprecations evenly across model types. </p></li><li class="listItem ultext"><p style="line-height:24px;padding:0px;text-align:left;word-break:break-word;"><span style="font-weight:700;"><b>Transparent sampling</b></span>: Publish deprecation lists and enforce fair sampling policies. </p></li></ol></div></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> You can also check out my video on LLM benchmark/leaderboard cheating. </p></td></tr><tr><td class="dd" align="center" valign="top" style="padding:20px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmM_r5vF1sd4h3wB5275KGdsXaGoei9iKxljVWWd5arHxyV4hIfCbH-qptcHLWpgYwQsXE6_B1rZf29t3yMeaySqEEqK9ngoISg5nIPLXMFxt/4g9/JUGeMKnaREaBBfh30JSrTQ/h8/h001.tu-Cf21pYtRLmGaDavGp78uqv-MRP9yWqaT-f008AEA" style="text-decoration:none;"><table align="center" width="100%" cellpadding="0" cellspacing="0" border="0" role="none" style="max-width:520px;margin:0 auto;"><tr><td class="p" width="100%" style="padding:2px;border:none;"><table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"><tr><td align="center" valign="top" style="width:100%;"><div style="max-height:0;position:relative;opacity:0.999;width:100%;mso-hide:all;"><div style="display:inline-block;width:100%;padding-top:25%;"><img width="20%" height="auto" loading="lazy" alt="" style="border:0;" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/youtube_play_icon.png"/></div></div><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmM_r5vF1sd4h3wB5275KGdvRCzm4cNGLibEulWj0hu-bSma8-ELf1ikD66YD89cDbpMdq44fEvXNjbupHmLR1wi-Q6JSkF17VOnOI7NKVmwA/4g9/JUGeMKnaREaBBfh30JSrTQ/h9/h001.phNpTYP1M7Y48yKvo0iYkfe-yECmW4sI4PYA5hkSNBo" style="text-decoration:none;"><img src="https://i.ytimg.com/vi/IbBEbjeVWgI/maxresdefault.jpg" width="480" height="auto" loading="lazy" alt="YouTube video by bycloud" style="display:block;height:auto;border:0;outline:none;text-decoration:none;background-color:#000000;width:100%;"/></a></td></tr><tr><td><p style="font-size:12px;font-weight:500;font-style:italic;font-family:Helvetica, Calibri, sans-serif;color: #686a6d; padding-top:0 !important;padding-bottom:6px !important; padding-left:4px !important;"> Cheating LLM Benchmarks Is Easier Than You Think… </p></td></tr></table></td></tr></table></a></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28yjf9KIXZdsXoh1WlHvvKndg2vzwC-9QzXrnn9CAX12RfwEklnBatGbCAaGPAIq6vBTh3YF1k9NqCXb9v-_VBTsOHnCjjkIYVSdoPD1k5RP/4g9/JUGeMKnaREaBBfh30JSrTQ/h10/h001.gFCOlLBv6G_oEDp78fjpsC516FfCKtkwl0wL4JtQTI4" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Read Full Paper </a></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="phi-4-reasoning-technical-report" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;">Phi-4-reasoning Technical Report</h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><i>Abdin et al. [Microsoft]</i></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;"> ♥ 1.4k </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> LLM Reasoning </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span></p></td></tr><tr><td id="how-phi-4-reasoning-competes-with-g" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;"><span style="color:rgb(67, 67, 67);">How Phi-4-Reasoning Competes with Giants</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> Large language models can do a lot but they also require a lot of computing power. But if we can build compact AI models that can solve complex problems without the computational requirements then it can be a game changer. Today’s frontier language models often rely on sheer size to tackle multi-step reasoning, but scaling parameters isn’t the only path forward. That’s why Microsoft built Phi-4-reasoning, a 14-billion-parameter model that prioritizes smarter training over brute-force scaling. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/6de2c096-ddb2-44d1-81ee-f6d4d74959a4/image.png?t=1746547068" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td id="inner-workings-and-training-of-phi-" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;"><span style="color:rgb(67, 67, 67);">Inner Workings and Training of Phi-4-Reasoning</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> The Phi-4-reasoning model starts with its base model, Phi-4, which already excels at factual recall and basic reasoning. To specialize it, researchers created a dataset of 1.4 million prompts filtered for “teachability”. These were specialized problems which were just beyond the model's current capabilities. These prompts consisted of math, coding, and safety-critical scenarios, and their answers were generated by OpenAI’s o3-mini to ensure high-quality reasoning traces. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/a9188436-324b-4389-9c0f-7f4158b30ada/image.png?t=1746547118" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Rewriting seed data from the web (left) into verifiable synthetic questions for SFT and RL (right)</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> The supervised fine-tuning phase repurposes two tokens, <span style="color:rgb(24, 128, 56);"><think></span> and <span style="color:rgb(24, 128, 56);"></think></span>, to structure the model’s internal reasoning. This simple formatting encourages the model to generate detailed chains of thought before final answers, mimicking how humans break down problems. When using this approach, training runs revealed an unexpected benefit: even without explicit guidance, the model began producing concise, verifiable solutions, which is useful for real-world applications. Finally, the researchers created a short reinforcement learning phase with 6,000 math-focused problems that pushed the model to generate longer, more precise reasoning traces. </p></td></tr><tr><td id="evaluating-phi-4-reasoning-model" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;"><span style="color:rgb(67, 67, 67);">Evaluating Phi-4-reasoning Model</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> In the AIME 2025 math competition (a gateway to the USA Math Olympiad), Phi-4-reasoning matches the performance of DeepSeek-R1, a model 48 times larger, and outperforms distilled versions like DeepSeek-R1-Distill-Llama-70B. In coding (LiveCodeBench), it beats its base model by 25 percentage points. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/0b28ec0e-d53b-4703-9fb3-f3e72e84813b/image.png?t=1746547179" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> Most researchers often use small datasets for testing their models, where minor variations skew results. To combat this, the team tested across multiple runs and reported standard deviations and using larger test sets. They also showed that “parallel test-time compute” approach is a viable solution for such models. The researchers showed that generating many candidate solutions and picking the best (via majority vote) could push accuracy near theoretical ceilings. For example, with 64 parallel generations, Phi-4-reasoning-plus approaches 95% accuracy on AIME 2025 which surpasses even its teacher model, o3-mini. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/dd9f4a21-e9ee-42e0-84f0-5d4df6ae8315/image.png?t=1746547212" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Average Pass@1 accuracy (%) of models on selected reasoning benchmarks.</p></td></tr></table></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28yjf9KIXZdsXoh1WlHvvKlLKZs0WxCxt1Li-pQnEFfaH9qcuFhToCnaRJgWAAbj0n_83vc2yq2gqnLUhUu9eRQTHwM20QD-bOMlCyE5Eoht/4g9/JUGeMKnaREaBBfh30JSrTQ/h11/h001.MYQ5hzQ7mjd6F1aR4cddRIotZUSVwbBl_tHcPn-rSqo" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Read Full Paper </a></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="softpick-no-attention-sink-no-massi" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;">Softpick: No Attention Sink, No Massive Activations with Rectified Softmax</h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><i>Zuhri et al. [MBZUAI]</i></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;"> ♥ 1.7k </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> LLM Attention </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> bycloud’s pick </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span></p></td></tr><tr><td id="introduction-to-the-softpick-softma" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;"><span style="color:rgb(67, 67, 67);">Introduction to the Softpick (Softmax’s Replacement)</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> Transformers have become the backbone of modern AI, but they mainly use softmax in attention layers that hides subtle quirks. One of the biggest issues is the <span style=""><i>attention sink</i></span> phenomenon, where models allocate disproportionate focus to tokens like the beginning-of-sequence (BOS) marker. Although it is harmless for performance, these sinks create <span style=""><i>massive activations</i></span>, which are extreme values in hidden states that complicate quantization and low-precision training. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/c1b140f5-adc9-4c96-8fc7-33cfe5422742/image.png?t=1746547303" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p> Figure 1: (left) Comparison between the attention maps when using softmax vs softpick and overall sink rate of the models. (right) Largest hidden state activation per layer of the models</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> The researchers of this paper introduce <span style="font-weight:700;"><b>Softpick</b></span>, a new normalization function designed to replace softmax. By relaxing softmax’s sum-to-one constraint and introducing sparsity, Softpick can eliminate these artifacts without sacrificing performance. </p></td></tr><tr><td id="inner-workings-of-softpick" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;"><span style="color:rgb(67, 67, 67);">Inner Workings of Softpick</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> Softpick re-creates the attention mechanism by decoupling normalization from strict probability constraints. Instead of exponentiating all inputs and normalizing to sum to one (as in softmax), Softpick applies a ReLU to shifted exponentials, then normalizes by the sum of their absolute values. This simple tweak allows attention scores to be zero for irrelevant tokens by creating sparse patterns. For example, if a token’s score is negative after shifting, it gets clipped to zero which effectively prunes its contribution to the output. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/af150382-9f32-4764-acd0-c6f3ded7419a/image.png?t=1746547358" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> This design preserves critical properties of softmax, such as bounded gradients and training stability, while avoiding pitfalls. Because the denominator includes absolute values, even negative-scoring tokens contribute to normalization which ensures gradients flow through all inputs. Moreover, this asymmetry breaks the sum-to-one requirement and eliminates the need for models to “waste” attention on sink tokens. This results in a self-regulating mechanism where heads can dynamically shut off (outputting zeros) when unused which reduces noise in hidden states. </p></td></tr><tr><td id="evaluation-of-softpick-normalizatio" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;"><span style="color:rgb(67, 67, 67);">Evaluation of Softpick Normalization</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> The researchers tested the Softpick Normalization approach on standard benchmarks like ARC-E and Wikitext and found that it matches or slightly outperforms softmax in accuracy and perplexity. Moreover, its real advantage emerges during quantization. At 2-4 bit precision, Softpick models retain significantly more performance than their softmax counterparts. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/1aa2e088-0586-4250-80b8-b151cc9282de/image.png?t=1746547576" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Comparison of softpick vs softmax performance for HQQ quantization methods. ( ↑= Higher is Better, ↓= Lower is Better, ∆= softpick - softmax)</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> When analysing the performance drops caused by quantization, the researchers found that Softpick has a 0% attention sink rate (vs. 33–63% for softmax) and hidden states with magnitudes reduced by an order of magnitude. These traits simplify low-precision training and sparsity optimizations. For example, dormant heads (those outputting zeros) could be pruned entirely which will save computation. The sparse attention maps also offer clearer interpretability, as zeroed scores highlight only relevant token interactions. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/9e6c9988-f4ab-4f1e-8abe-d53bc9fbb076/image.png?t=1746547503" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="line-height:24px;"> However, Softpick isn’t without limitations. In long-context tasks like passkey retrieval, its scores can become underscaled due to normalization over many tokens which weakens signal strength. If you want to experiment with quantization or sparse training, then you can use Softpick's drop-in solution today using the code in its GitHub repo. </p></td></tr><tr><td align="center" valign="top" style="padding:14px 32px 14px 32px;" class="dd"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.VomAAYwkCjux8i_FMc4kJURoWzGz830k6Z1SPnOWQJ-eBY-M4zK19TP2ZNi-HYD4T0UchrygPo0A927gBTNqJqAE1-kt7DDNCexYG8LVbVfHb6KTxbXNw_Wov1pf7Cfn9fzdgVfx8jvGTlAUBZx6wg/4g9/JUGeMKnaREaBBfh30JSrTQ/h12/h001.upf7XED97IwzQ04QVEtSC27PiSP30ZKacORS9-l69p0" style="text-decoration:none;" target="_blank"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"><tr><td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"><tr><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="embed-img" align="center" valign="top" style="width:100%;min-height:100px;vertical-align:middle;padding:0px 0px 12px 0px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.VomAAYwkCjux8i_FMc4kJURoWzGz830k6Z1SPnOWQJ-eBY-M4zK19TP2ZNi-HYD4vhvb-PvaeIMOBniw3NEIhftLukPhu3jvyG1RL847ihnSJV1Pn6bMu4-Xr2CjGP8-M_sj3eMRrwDb7uylC9WaQg/4g9/JUGeMKnaREaBBfh30JSrTQ/h13/h001.n9gRANefCEkwLONINFrNBcHNOFk3TRcRZTMIB2pVs_c" style="text-decoration:none;" target="_blank"><img src="https://opengraph.githubassets.com/d0cd34b0e4477331b6a25a449d5c8260ec9366981c5633ea11d0e3dcd30b45c1/zaydzuhri/softpick-attention" width="100%" style="display:block;"/></a></td><!--[if mso]></tr></table></td><![endif]--></tr><tr><td align="center" valign="top" class="cc"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="left" valign="top" class="l"><p>GitHub - zaydzuhri/softpick-attention: Implementations of attention with the softpick function, naive and FlashAttention-2</p></td></tr><tr><td align="left" valign="top" class="m"><p>Implementations of attention with the softpick function, naive and FlashAttention-2 - zaydzuhri/softpick-attention</p></td></tr></table></td></tr></table></td></tr></table></a></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28yjf9KIXZdsXoh1WlHvvKmoglmsJ8CBMMepS8R_9EiUl2hq6GSSoYZDGDgLRVxqg-OxeDtMNJhFpajUlxKteZZH0YMVUj9uoHTvEzQgNF0p/4g9/JUGeMKnaREaBBfh30JSrTQ/h14/h001.ori-3fx0f3h8cNI1DSNwbv6fvAxRjvnENGQZ3c7X8rs" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Read Full Paper </a></td></tr></table></td></tr><tr><td class="dd" style="padding: 20px;"><table width="100%" cellpadding="0" cellspacing="0" role="none" style="max-width:520px;margin:0 auto;"><tr><td class="q" style="padding:16px 16px 6px 16px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoDDFT6eh5Nsg0xYVQj-h6I3o9m2k79_qw4izMYhmcI36zRzvGzFi1iud6QJ-1zHz4ZPbkKFPlK3yHvHYiKrsMb29Cra6SKYeZE5AYaTaAQ4B0nZUP5eyFMlrrGiHxKZuaJek8gJ5tqgwNw2ZDms5sEY/4g9/JUGeMKnaREaBBfh30JSrTQ/h15/h001.vw83_SXdd1d8UsrMcLW7jQdYQ_Tp-BoTeT8H7j-e1MY" style="text-decoration:none !important;"><table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"><tr><td width="100%" style="padding: 0 0 14px 0;text-decoration:none;width:100%;"><table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"><tr><td width="36" style="width:36px;"><img src="https://pbs.twimg.com/profile_images/1698572487909400576/BvncwnrP_normal.jpg" alt="tw profile: The AI Timeline" style="display:block;width:36px;height:36px;border-radius:50%;border:0;"/></td><td width="400" style="padding:0 0 0 8px;text-decoration:none;"><span style="display:block;font-size:14px;color:#1c2022;font-weight:700;"> The AI Timeline </span><span style="display:block;color:#697882;font-size:14px;"> @TheAITimeline </span></td><td width="24" align="right" style="vertical-align:text-top;"><img width="24" height="24" loading="lazy" alt="tw" style="border:0;" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_logo.png"/></td></tr></table></td></tr><tr></tr><tr><td style="word-break:break-word;"><p>🚨This week's top AI/ML research papers:</p><p>- DeepSeek-Prover-V2 <br>- The Leaderboard Illusion <br>- Phi-4-reasoning Technical Report <br>- Mem0 <br>- X-Fusion <br>- Softpick <br>- RL for Reasoning in LLMs with One Training Example <br>- ReasonIR <br>- RL for LLM Reasoning Under Memory Constraints <br>-</p></td></tr><tr><td style="padding:12px 0 0 0;"></td></tr><tr><td align="center" style="padding:8px 0 0 0;width:480px;"><img src="https://pbs.twimg.com/media/GqI1ZFrW8AELxBX.jpg" width="480" height="auto" style="display:block;border:1px solid #E1E8ED;border-radius:5px;width:100%;max-width:480px;height:auto;"/></td></tr><tr><td height="8" style="line-height:1px;font-size:1px;height:8px;"> </td></tr><tr><td align="left" valign="top" class="s"><p>10:22 PM • May 4, 2025</p></td></tr><tr><td height="10" style="line-height: 1px; font-size: 1px; height: 10px;"> </td></tr><tr><td height="1" bgcolor="#e1e8ed" style="line-height:0px;font-size:0px;height:1px;"></td></tr><tr><td height="10" style="line-height:1px;font-size:1px;height:10px;"> </td></tr><tr><td align="left" valign="top" class="s"><p><b style="color:#1C2022">944</b> Likes <b style="color:#1C2022">85</b> Retweets </p></td></tr><tr><td align="left" valign="top" class="s"><div align="center" style="text-align:center;margin-top:4px;margin-bottom:4px;padding:8px;border:1px solid #ccd6dd;border-radius:9999px;color:#1B95E0"><b>9 Replies</b></div></td></tr></table></a></td></tr></table></td></tr></table></td></tr></table></td></tr><tr><td align="center" valign="top"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td><tr><td class="b" align="center" valign="top" bgcolor="#2a2a2a" style="padding:0px 0px 0px 0px;border-style:solid;border-width: 0px 0px 0px 0px;border-color: #2a2a2a;border-bottom-left-radius:10px;border-bottom-right-radius:10px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top" bgcolor="#73ddff" style="padding:12px"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td><span style="padding-left:1px;"></span></td><td align="center" valign="middle" width="75" style="width:75px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.1muhFWIqieRYpaJ-FbWSCQqcWoV4NNHHr5SkP9THApWuHAAlWLQxI3Q_IqFmt_DcyAxeC8jDApCnHmMSBGpBb5sgtimvBYgxRX-Rp7s0F3LjCHoSwdhr83OBqRFhJ1y_/4g9/JUGeMKnaREaBBfh30JSrTQ/h16/h001.-QmbAvq6tYDkdduxg88a4Lc3WuwAcLIMZNouUfb2nxU" style="text-decoration:none;"><img width="22" height="22" alt="tw" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_dark.png"/></a></td><td align="center" valign="middle" width="75" style="width:75px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmBoQnQ9VXnB2zTxBG4HeHBgjMqVxpoXRdj01cjwyoVlHgiebEOgBvwHtevoVpsSvpn3Q1di2ml6sb3cBM-X6IStQbj_zQSVGWJ8AAmPw2en2/4g9/JUGeMKnaREaBBfh30JSrTQ/h17/h001.WgkZCTKx02KcjwlUiH-23w_xvMnLopIZqDq49tzVjiA" style="text-decoration:none;"><img width="22" height="16" alt="yt" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/youtube_dark.png"/></a></td><td><span style="padding-left:1px;"></span></td></tr></table></td></tr><tr><td height="10" style="line-height:1px;font-size:1px;height:10px;"> </td></tr><tr><td class="w" align="center" valign="top" style="padding:15px 15px 15px 15px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top"><p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"> Update your email preferences or unsubscribe <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsBhEpz-DJgyVFmavJPa0OyKRRnvw4o7XGyvIv7PRofnmD6TplH7Tr3lO63GFhQz3o35cRKGZDTZyUDTBOqsNFpoBWJonUjwytN1_af-rvB4fDW7u6pIU_ft0xye12Hz9sdui8Cf8rclz6_nXD3wErxG31UiMlgQbZwCVXHJD8feuzeT7O6t3NdEeUOlqOsp7nE_uZFo_9n2pLBL6DHcDolGnh9ELH6RQEDn-sciK1cCKE8VhsZzhM84NxlPE4pXv_LQRqF1tvANXLXh_exuu-gSQs4rPzS-9zKB06xvKbK9T8IzsvkOSUWwi28U-BPAnTw9H_yaZbRdhOGoD8kr65kjzjz6uqGoWsMVHUitzUeRJfCcCGEEgqvkkO949PYfm42KW7CAf6FOF9CvWGG0z6Jr2_e1YWK2HUFq7Tls6_UAXG6YX0Crg9t5ifGHV5kmPIqwfvN7pGj3JMBJoGxc4rDI1aL5tJBp3c1NNP0o06GgKicH7R_Do9n7J7MmXgVHSOM5DJxSQXjwr2vkw9fc5Xaw5S7rElrMKJh2eTbSpsrjnQkfN8QVUuc1DsRC-siYywIV_fNyqUHN7u3WVdw2mdrxrzQ8L1M8N9BHvq_7g-xFPHRNSIUMHNvLPZLRFI-zQYWfUEHKCAmlbrLzMnMloP32NOuNZI9Frqk_T0lOtS0WZSRCjLpCq5jo_48ECL1oxja4F5TddBPkao4kLC2Oqxn4SUDwoI8_oqZllVIO4WCpDo8jrCdpXF4m-bpWF5Ff97Q/4g9/JUGeMKnaREaBBfh30JSrTQ/h18/h001.YfoBuDvHpBZExIVvGKrCbEhkAcH7atbF88YY1LBzh5Q" style="text-decoration:underline;text-decoration-color:#FFFFFF!important;color:#FFFFFF!important;"> here</a></p><p class="copyright" style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"> © 2025 bycloudai </p><p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"> 228 Park Ave S, #29976, New York, New York 10003, United States </p></td></tr><tr style="display: table-row !important;"><td align="center" valign="top" style="padding-top:20px;" style="display:table-cell !important;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="display:table !important;"><tr style="display:table-row !important;"><td class="u" align="center" valign="middle" height="32" style="height:32px;display:table-cell !important; max-height: 32px !important;margin:0px !important; background-color: #ffffff !important;"><a style="line-height:32px !important;text-decoration:none;display:block !important;" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28olDWFpV5DDKfdk_OdOKOgW30cPnATMBg8yu-wXnQKRayO9Ge9x7ZeCOzpqHrDx-7NkTbcE28kSncn1fZWJMIQbsPZGSyyuuvd9w365r2F9etG4wNWL4n4tMJYtKb-xI24L5Dz-s4Odn1AeyzNDnLble9U3Xsvqru1YPjKsLSVSb7hKrK8iExquUiI6wubKD9z3ibtSzL-IEhnZ9Py5AGmuqgGrE8Es56VpktEJVh1b/4g9/JUGeMKnaREaBBfh30JSrTQ/h19/h001.rUtb4vVB4_SzvKxsYdWfWemk6ONJ5JpEEiAqL6fBmwI"><img src="https://media.beehiiv.com/output-onlinepngtools.png" width="16" alt="beehiiv logo" style="display:inline-block !important;max-width:16px !important; vertical-align:-3px !important;width: 16px !important;" border="0"/><span style="padding-left:11px !important;display: inline-block !important;">Powered by beehiiv</span></a></td></tr></table></td></tr><tr><td align="left" valign="top" height="2" style="height:2px;"><a href='https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWsHIaP4XNp0WgUYqLvHcKk_3uqk_KIkz4ddLinhFbud6JuxLFdSUhYnR7b1NSsmbtzXNGNblnEEMKUtkCAjkn8Y/4g9/JUGeMKnaREaBBfh30JSrTQ/h20/h001.9FACW3A1bj0pIHsmMJs3U4FFtFOjICJX3RHCQ30DuIc' style="color: #2a2a2a !important; cursor: default; font-size: 1px; text-decoration: none;"> Terms of Service </a></td></tr></table></td></tr></table></td></tr></td></tr></table></td></tr></table></td></tr></table></td></tr></table></div></body></html>