<!DOCTYPE html><html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" style="font-size:16px;"><head></head><head><meta charset="utf-8"/><!--[if !mso]><!--><meta http-equiv="X-UA-Compatible" content="IE=edge"/><!--<![endif]--><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="x-apple-disable-message-reformatting"/><meta name="format-detection" content="telephone=no,address=no,email=no,date=no,url=no"/><meta name="color-scheme" content="light"/><meta name="supported-color-schemes" content="light"/><title>Log-Linear Attention: In-between mamba & attention?</title><!--[if mso]><xml><o:OfficeDocumentSettings><o:AllowPNG/><o:PixelsPerInch>96</o:PixelsPerInch></o:OfficeDocumentSettings></xml><![endif]--><style> :root { color-scheme: light; supported-color-schemes: light; } body { margin: 0; padding: 0; min-width: 100%!important; -ms-text-size-adjust: 100% !important; -webkit-transform: scale(1) !important; -webkit-text-size-adjust: 100% !important; -webkit-font-smoothing: antialiased !important; } .body { word-wrap: normal; word-spacing:normal; } table.mso { width: 100%; border-collapse: collapse; padding: 0; table-layout: fixed; } img { border: 0; outline: none; } table { mso-table-lspace: 0px; mso-table-rspace: 0px; } td, a, span { mso-line-height-rule: exactly; } #root [x-apple-data-detectors=true], a[x-apple-data-detectors=true], #MessageViewBody a { color: inherit !important; text-decoration: inherit !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important; } span.MsoHyperlink { color: inherit !important; mso-style-priority: 99 !important; } span.MsoHyperlinkFollowed { color: inherit !important; mso-style-priority: 99 !important; } .a { background-color:#dedede; } .b { background-color:#2a2a2a; } .c { background-color:#ffffff; } .d { background-color:#fff0c8; } .d2 { background-color:#FFFFFF; } .d3 { background-color:#FFFFFF; } h1 a { text-decoration:none;color:#2C81E5;font-style:italic; } h2 a { text-decoration:none;color:#2C81E5;font-style:italic; } h3 a { text-decoration:none;color:#2C81E5;font-style:italic; } h4 a { text-decoration:none;color:#2C81E5;font-style:italic; } h5 a { text-decoration:none;color:#2C81E5;font-style:italic; } h6 a { text-decoration:none;color:#2C81E5;font-style:italic; } h1, h1 a, h2, h2 a, h3, h3 a, h4, h4 a, h5, h5 a, h6, h6 a, ul, li, ol, p, p a { margin: 0;padding: 0; } h1 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:28px;color:#2A2A2A;line-height:42px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h2 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:700;font-size:24px;color:#2A2A2A;line-height:36px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h3 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:20px;color:#2A2A2A;line-height:30px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h4 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:18px;color:#2A2A2A;line-height:27px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h5 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:16px;color:#2A2A2A;line-height:24px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } h6 { font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif;font-weight:400;font-size:14px;color:#2A2A2A;line-height:21px;padding-bottom:4px;padding-top:16px;mso-margin-top-alt:16px;mso-margin-bottom-alt:4px } p { font-family:'Georgia','Times New Roman',serif;font-weight:400;color:#2D2D2D;font-size:16px;line-height:24px;padding-bottom:8px;padding-top:8px;mso-margin-top-alt:8px;mso-margin-bottom-alt:8px; } p a, .e a, ul a, li a, .h a, .h2 a, .h3 a { word-break:break-word;color:#2C81E5 !important;text-decoration:none;font-style:italic; } p a span, .e a span, ul a span, li a span { color: inherit } p .bold { font-weight:bold;color:#2D2D2D; } p span[style*="font-size"] { line-height: 1.6; } .f p { font-size:12px;line-height:15px;color:#2D2D2D;padding:0; } .f p a { color:#2D2D2D !important; } .g p { font-family:'Helvetica',Arial,sans-serif;font-size:14px;line-height:20px;font-weight:normal;margin:0; } .g p a { text-decoration: underline; } .i p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; } .i p a { color:#2D2D2D !important; } .i2 p { font-family:'Helvetica',Arial,sans-serif;line-height:23px;font-size:15px;color:#2D2D2D; } .i2 p a { color:#2D2D2D !important; } .i3 p { font-family:'Helvetica',Arial,sans-serif;line-height:43px;font-size:24px;color:#2D2D2D; } .i3 p a { color:#2D2D2D !important; } .h p a { color:#595959 !important; } .h2 p a { color:#595959 !important; } .h3 p a { color:#595959 !important; } .f p a, .i p a, .i2 p a, .i3 p a, .h p a, .h2 p a, .h3 p a { text-decoration:underline; } .j { border-top:3px solid #ffeb2d; } .k p { padding-left:15px;padding-bottom:0px;padding-top:6px;mso-margin-top-alt:6px;mso-margin-bottom-alt:0px;mso-margin-left-alt:15px; } .o { background-color:#FFFFFF;border:1px solid #F1F1F1;border-radius:5px; } .o p { font-family:'Helvetica',Arial,sans-serif;padding:0px;margin:0px; } .l p, .l p a { font-size:14px;line-height:20px;font-weight: bold;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; } .m p, .m p a { font-size:13px;line-height:18px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; } .n p, .n p a { font-size:12px;line-height:17px;font-weight:400;color:#2D2D2D;padding-bottom:6px;mso-margin-bottom-alt:6px;text-decoration:none; } .p { background-color:#FFFFFF;max-width:520px;border:1px solid #E1E8ED;border:1px solid rgba(80, 80, 80, 0.3);border-radius:5px; } .q { font-size:16px;font-family:Helvetica,Roboto,Calibri,sans-serif !important;border:1px solid #e1e8ed;border:1px solid rgba(80, 80, 80, 0.3);border-radius:10px;background-color:#FFFFFF; } .q p { font-size:16px;font-family:system-ui,Helvetica,Roboto,Calibri,sans-serif !important;color:#222222;padding:4px 0; } .r { border:1px solid #E1E8ED !important;border-radius:5px; } .s p { font-size: 14px; line-height: 17px; font-weight: 400; color: #697882; text-decoration: none; } .t p { font-family:'Helvetica',Arial,sans-serif;font-size:12px;line-height:18px;font-weight:400;color:#000000;font-style:italic;padding:4px 0px 0px; } .v { border-radius:10px;border:solid 0px #DFD150;background-color:#2C81E5;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;color:#FFFFFF; } .v a { text-decoration:none;display:block;color:#FFFFFF; } .w p { font-size:12px;line-height:15px;font-weight:400;color:#FFFFFF; } .w p a { text-decoration: underline !important;color:#FFFFFF !important; } ul { font-family:'Helvetica',Arial,sans-serif;margin:0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:disc;font-size:16px; } ul > li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:disc; } ol { font-family:'Helvetica',Arial,sans-serif;margin: 0px 0px 0px 25px !important;padding:0px !important;color:#2D2D2D;line-height:24px;list-style:decimal;font-size:16px; } ol > li { font-family:'Helvetica',Arial,sans-serif;margin:10px 0px 0px 0px !important;padding: 0px 0px 0px 0px !important; color: #2D2D2D; list-style:decimal; } .e h3, .e p, .e span { padding-bottom:0px;padding-top:0px;mso-margin-top-alt:0px;mso-margin-bottom-alt:0px; } .e span, .e li { font-family:'Helvetica',Arial,sans-serif;font-size:16px;color:#2D2D2D;line-height:24px; } .rec { font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji" !important; } .rec__button:hover { background-color: #f9fafb !important; } .copyright a {color: inherit !important; text-decoration: none !important; font-size: inherit !important; font-family: inherit !important; font-weight: inherit !important; line-height: inherit !important;} .txt_social p { padding: 0; word-break: break-all; } .table, .table-c, .table-h { border: 1px solid #C0C0C0; } .table-c { padding:5px; background-color:#FFFFFF; } .table-c p { color: #2D2D2D; font-family:'Helvetica',Arial,sans-serif !important;overflow-wrap: break-word; } .table-h { padding:5px; background-color:#F1F1F1; } .table-h p { color: #2A2A2A; font-family:'Trebuchet MS','Lucida Grande',Tahoma,sans-serif !important;overflow-wrap: break-word; } @media only screen and (max-width:667px) { .aa { width: 100% !important; } .bb img { width: 100% !important; height: auto !important; max-width: none !important; } .cc { padding: 0px 8px !important; } .ee { padding-top:10px !important;padding-bottom:10px !important; } .ff ul, .ff ol { margin: 0px 0px 0px 10px !important;padding: 0px !important; } .ff li { margin:10px 0px 0px 10px !important; } .r {height:140px !important;} .s p { font-size:13px !important;line-height:15px !important; } .mob-hide {display:none !important;} .mob-stack {display:block !important;width:100% !important;} .mob-w-full {width:100% !important;} .mob-block {display:block !important;} .embed-img {padding:0px 0px 12px 0px !important;} .socialShare {padding-top:15px !important;} .rec { padding-left:15px!important;padding-right:15px!important; } .bodyWrapper { padding:7px 4px 7px 4px !important; } .social-mobile {float:left !important;margin-top:10px !important;} } @media screen and (max-width: 480px) { u + .a .gg { width: 100% !important; width: 100vw !important; } .tok-heart { padding-top:75% !important; } .tok-play { padding-top: 250px !important; } } @media screen and (max-width: 320px) { .tok-heart { padding-top:65% !important; } } .u { border: 1px solid #CACACA !important; border-radius: 2px !important; background-color: #ffffff !important; padding: 0px 13px 0px 13px !important; font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif !important;font-size: 12px !important; color: #767676 !important; } .u a { text-decoration: none; display: block !important; color: #767676 !important; margin: 0px !important; } .u span, .u img { color: #767676 !important;margin:0px !important; max-height:32px !important;background-color:#ffffff !important; } </style><!--[if mso]><style type="text/css"> sup { font-size: 100% !important;vertical-align: .5em !important;mso-text-raise: -1.5% !important;line-height: 0 !important; } ul { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; } ul li { margin-left: 0px !important; mso-special-format: decimal; } ol { margin-left:0px !important; margin-right:10px !important; margin-top:20px !important; margin-bottom:20px !important; } ol li { margin-left: 0px !important; mso-special-format: decimal; } li.listItem { margin-left:15px !important; margin-top:0px !important; } .paddingDesktop { padding: 10px 0 !important; } .edm_outlooklist { margin-left: -20px !important; } .embedImage { display:none !important; } </style><![endif]--><style> @font-face { font-family: 'Open Sans'; font-style: normal; font-weight: 700; font-display: swap; src: url('https://fonts.gstatic.com/s/opensans/v40/memSYaGs126MiZpBA-UvWbX2vVnXBbObj2OVZyOOSr4dVJWUgsg-1x4gaVIUwaEQbjA.woff2') format('woff2'); } @font-face { font-family: 'Open Sans'; font-style: italic; font-weight: 700; font-display: swap; src: url('https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@1,700&display=swap') format('woff2'); } </style></head><body class="a" style="margin:0px auto;padding:0px;word-wrap:normal;word-spacing:normal;background-color:#dedede;"><div role="article" aria-roledescription="email" aria-label="email_name" lang="en" style="font-size:1rem"><div style="display:none;max-height:0px;overflow:hidden;"> Dive into AI's latest breakthroughs: Qwen Team, MIT-Princeton research, and cutting-edge insights from top research institutions like Qwen Team, MIT-Princeton research in this week's AI Timeline...  ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ </div><table role="none" width="100%" border="0" cellspacing="0" align="center" cellpadding="0" class="gg"><tr><td align="center" valign="top"><table role="none" width="670" border="0" cellspacing="0" cellpadding="0" class="aa" style="width:670px;table-layout:fixed;"><tr><td class="bodyWrapper" align="center" valign="top" style="padding:7px 7px 7px 7px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top" style="border-width:0px 0px 0px 0px;border-style: solid; border-color: #2a2a2a;border-radius:10px 10px 0px 0px;background-color:#ffffff;" class="c"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr id="header"><td style="padding:28px 28px 0px 28px;"><div style="padding-top:0px;padding-right:0px;padding-bottom:20px;padding-left:0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td class="f" align="right" valign="top"><p> June 10, 2025 | <a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EyQ1ghLNC_FSMc3MrCHqQeQ8vEciTIXgVdmvh2TKPyxLOx-fZYZWAPkjonsJ-X80PSXpvFQDMy-wzxzXrWCsoDAbplbWMRV52E005GjVTpK8cM3KjXKao-0MPLTgOMnvb3RnnXRs65vsJQYkoTUem8J9ZGcXjpG271W17G9FodUhnNdpJ3j46DiP8AUInDO6Qs/4h8/qG-VYipiQRaVgzjHnqTHzg/h0/h001.2P6Bz-eTG7ff-qDdUi3K-ml_z-B-P9YNqu43FonkqH8">Read Online</a></p></td></tr><tr><td class="dd" align="center" valign="top" style="padding:15px 0;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top"><h1 style="text-align:left;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;font-weight:Bold;font-size:32px;color:#2A2A2A;padding:2px 0;line-height:38px;"> Log-Linear Attention: in-between of mamba & attention? </h1><p style="text-align:left;font-family:'Helvetica',Arial,sans-serif;font-weight:normal;font-size:20px;color:#3E3E3E;padding:5px 0;line-height:24px;"> Dive into AI's latest breakthroughs: Beyond 80/20 Rule, How much do language models memorize, and cutting-edge insights from top research institutions like Qwen Team, MIT-Princeton research in this week's AI Timeline update. </p></td></tr></table></td></tr><tr><td style="height:0px;width:0px;"><div style="height:1px;" data-open-tracking="true"> <img src="https://elink4f7.mail.bycloud.ai/ss/o/u001.3wmUuY8gEWd4_869a_eXcg/4h8/qG-VYipiQRaVgzjHnqTHzg/ho.gif" alt="" width="1" height="1" border="0" style="height:1px !important;width:1px !important;border-width:0 !important;margin-top:0 !important;margin-bottom:0 !important;margin-right:0 !important;margin-left:0 !important;padding-top:0 !important;padding-bottom:0 !important;padding-right:0 !important;padding-left:0 !important;"/> </div></td></tr></table></div></td></tr><tr id="content-blocks"><td class="email-card-body" align="center" valign="top" style="padding-bottom:28px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td id="nov-18-th-nov-24-th-33-latest-ai-re" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h6 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:87.5%;"><i>June 2ne ~ June 9th</i><br><i>#59 Latest AI Research Explained Simply</i></h6></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="industry-news-in-1-line" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;">🗞️ Industry News in 1 Line</h2></td></tr><tr><td style="padding-bottom:12px;padding-left:50px;padding-right:40px;padding-top:12px;" class="ee"><div style="margin-left:0px;" class="edm_outlooklist"><ol start="1" style="list-style-type:decimal;margin:0px 0px;padding:0px 0px 0px 0px;"><li class="listItem ultext"><p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;">♥ 1.4k</span></span> The <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.wcXdj6dB6nd1Cx4inzJNk_td6EHXs7wvRPySUkyhYFdm3BCjWE1GpZJhykHz3urKM_U8kKoC9cl0_QxVckAExv_a0DKtABLWrbq_qyAeupx7cB5Q0N4SE1kFyGqFtCECHWPETz9GOXyWlenGDSdyUQ/4h8/qG-VYipiQRaVgzjHnqTHzg/h1/h001.hk3hjGvY4IGFPGulhDb0fkLBEqZl8K_sXSajEhPae4E" target="_blank" rel="noopener noreferrer nofollow"><span>Qwen3 Embedding and Reranker series</span></a> introduce multilingual text embedding and relevance-ranking models (available in 0.6B/4B/8B sizes) built on the Qwen3 LLMs, achieving state‑of‑the‑art performance across benchmarks </p><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:510px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/92f8ca29-73db-402d-a924-da33dbfa3383/image.png?t=1749573242" alt="" height="auto" width="510" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:510px;"><p>Qwen-3 Embedding & Reranker benchmarks</p></td></tr></table></li><li class="listItem ultext"><p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;">♥ 4.1k</span></span> <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.oB7zuO_W-X4Toa45C28ng3N7PLrWqoSDhsskwKrBQeiF5FxniVLW1TvSb0TUwtAMfLKeOB9AyAxIzrt7DMxyDaEglTyAowBO5D1r8aZ37NbTjEqoSmWCWXnLrBX3g9ks5GUFKz4QHKNCinhDOeGWDlzoW698WS7-ufgW6BUQFKA/4h8/qG-VYipiQRaVgzjHnqTHzg/h2/h001.Dn8Ra8CjJrw1Z-yKbypYClsIjKmilihgN1SXW64yJoA" target="_blank" rel="noopener noreferrer nofollow"><span>Google just upgraded Gemini 2.5 Pro</span></a> (Gemini-2.5-pro-0605), showing substantial Elo gains on LMArena (+24) and WebDevArena (+35) while maintaining top performance in coding (Aider Polyglot) and reasoning benchmarks like GPQA and HLE. Pretty much a new SoTA. </p><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:450px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/6151c60f-d693-46bf-ad75-97ca3577be4d/gemini_2-5_procomp_benchmarks_cropped_light_3.gif?t=1749573294" alt="" height="auto" width="450" style="display:block;width:100%;" border="0"/></td></tr></table></li><li class="listItem ultext"><p style="mso-line-height-alt:150.0%;padding:0px;text-align:left;word-break:break-word;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;">♥ 1k</span></span> <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.9ggl6Mt0xphuuMReR5gVpc5v7AUulOrLyKBZrKRu_nx9kVi5XHNj8HazozO3qrJ6oaNGkouS5226oVvc-vYDyykOW-L-XfaJFq7-Wea5sS_dZAjZxcwbq46ENkLZZSLitpCMh8veeuynJUaGw3epx5Zr1vO2MB0-cMiRSn55GvE/4h8/qG-VYipiQRaVgzjHnqTHzg/h3/h001.EqzNAWuTD8jWV3mqy0ScN2muwSF1OM8uNOHl5q4sik0" target="_blank" rel="noopener noreferrer nofollow"><span>Apple released Foundation Models framework</span></a> that delivers an on-device 3B-parameter LLM for iOS 26+ platforms, enabling private, offline execution of tasks like summarization, dialog, and entity extraction, while supporting structured output via @Generable and dynamic function execution through tool calling. </p><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:540px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/d1c0afcf-36ed-445a-a541-03a02928b822/image.png?t=1749573796" alt="" height="auto" width="540" style="display:block;width:100%;" border="0"/></td></tr></table></li></ol></div></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="transparent" style="background-color:transparent;border-color:#2C81E5;border-style:solid;border-width:5px;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;"><span style="">The AI Timeline: Premium Insights</span></h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">Recently, we introduced a premium membership for The AI Timeline! </span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">With the membership, you would receive exclusive insights/explainers into technical AI topics and monthly research trend reports that contains my analysis of up to 40+ papers.</span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">Check out the </span><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;"><b>Monthly Research Reports (~4000 words) </b></span><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">here:</span></p></td></tr><tr><td align="center" valign="top" class="dd"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EzRMZU7syrc4B7ZADDNO77fF1CP9dNVR_P9aEWpRp8MUJs4-GxSOtnm-_GrfclATvzA6KgzXx0OKmlBOTkzUXcWuUvIqb69Luo8oST5epIEF1L5eLtOK2SFiI5C3CpPyH0avHtIcjydMEYP6eyBURSbDfnnuA9osaGMQRNSF7ArbUg_30RoalasV_1HB-Th320/4h8/qG-VYipiQRaVgzjHnqTHzg/h4/h001.1d7soAdDEE61Cg4FA3kB3bFv1IpTPyvmrRQ--25_7Uk" style="text-decoration:none;" target="_blank"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"><tr><td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"><tr><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="embed-img mob-stack" align="center" valign="top" style="width:35%;min-height:100px;vertical-align:middle;display:none;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EzRMZU7syrc4B7ZADDNO77fF1CP9dNVR_P9aEWpRp8MUEodPNBwgKzPBQvZX63W3wlH_CQ84U-MKW9KM6tyxKLmBqgYIZojPTieGcmRSpceKN0OtF2rMRFtCnYPgtLRe1YWkKXxRG4Fofg3CVEiuOUWLPgpdlQ-pF8ZbfWWi9at-UtKz75svjNA-tiO3Trin5w/4h8/qG-VYipiQRaVgzjHnqTHzg/h5/h001.ot7neDAeAfMOjg9eXjJOTt3Si7WoF1E_KqYlV3zP9aU" style="text-decoration:none;" target="_blank"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/ec385139-4e87-42bb-933d-9a33db719a7f/May_2025_research_trend_report.jpg?t=1748926968" width="100%" style="display:block;"/></a></td><!--[if mso]></tr></table></td><![endif]--></tr><tr><td align="center" valign="top" class="cc"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="left" valign="top" class="l"><p>May 2025 Research Trend Report</p></td></tr><tr><td align="left" valign="top" class="m"><p>Premium Insights: A recap of popular AI research papers and research trends in May 2025 </p></td></tr><tr><td align="left" valign="bottom" class="n" style="vertical-align:bottom;padding-top:12px;"><p style="word-break:break-word;">mail.bycloud.ai/p/may-2025-research-trend-report</p></td></tr></table></td><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="mob-hide" align="center" valign="top" style="width:35%;min-height:100px;padding:0px 0px 0px 12px;vertical-align:middle;"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/ec385139-4e87-42bb-933d-9a33db719a7f/May_2025_research_trend_report.jpg?t=1748926968" width="100%" style="display:block;"/></td><!--[if mso]></tr></table></td><![endif]--></tr></table></td></tr></table></a></td></tr><tr><td align="center" valign="top" class="dd"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EyrGVaD60VU8Ea8jPlR3ZXHnN1k6WLkfvEssMSV0_t-z1M0VorNEeRwTI9tNvnEiVwQmGyi4D-bjMAr651Z9G51LyzQJAn9w_JYoOpW_OrqsJ0bMl-8OjrYGPM5GzHkwdK0Wo4WA27-T85CAJGv6M4iM8vfZCCbRFQg4gANXHheLJM_PtktEp5qH8ESP6gIbkpBDpt6SNT3Vdhbpx-GHSYR/4h8/qG-VYipiQRaVgzjHnqTHzg/h6/h001.aL4mGYOX5BZ0F6vWOYDH0pslQY2FD3i9Ps-AZp9xsXo" style="text-decoration:none;" target="_blank"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"><tr><td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"><tr><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="embed-img mob-stack" align="center" valign="top" style="width:35%;min-height:100px;vertical-align:middle;display:none;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3EyrGVaD60VU8Ea8jPlR3ZXHnN1k6WLkfvEssMSV0_t-z-lmXBdKFKjRwq2kFXyp51Lu1ZgIAkyS2J97fyqD3nnZLUIEjZngBvA3LcBOYiXMleMh4QPr7b5eobxnlXkA8CRBfwTXoaq89ydaGPRmnpDpEGjBqxrwIjS4KeeVWGD_7jJ6XF18jLlKMMIfnD9X7opWosk302hTsiiTphydFGmO/4h8/qG-VYipiQRaVgzjHnqTHzg/h7/h001.f_QCI2hTiBw1fyVHIeuejlDgmkNSR14zJW9gvqUXFOQ" style="text-decoration:none;" target="_blank"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/36ff4cc5-7137-481e-bcf4-5d7b047b481b/april_2025_research_trend_report.jpg?t=1745891622" width="100%" style="display:block;"/></a></td><!--[if mso]></tr></table></td><![endif]--></tr><tr><td align="center" valign="top" class="cc"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="left" valign="top" class="l"><p>April 2025 Research Trend Report</p></td></tr><tr><td align="left" valign="top" class="m"><p>Premium Insights: A recap of popular AI research papers and research trends in April 2025</p></td></tr><tr><td align="left" valign="bottom" class="n" style="vertical-align:bottom;padding-top:12px;"><p style="word-break:break-word;">mail.bycloud.ai/p/april-2025-research-trend-report</p></td></tr></table></td><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="mob-hide" align="center" valign="top" style="width:35%;min-height:100px;padding:0px 0px 0px 12px;vertical-align:middle;"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/36ff4cc5-7137-481e-bcf4-5d7b047b481b/april_2025_research_trend_report.jpg?t=1745891622" width="100%" style="display:block;"/></td><!--[if mso]></tr></table></td><![endif]--></tr></table></td></tr></table></a></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">Deep Dive Blogs:</span></p></td></tr><tr><td align="center" valign="top" class="dd"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3ExcsBUgAwWgXosV3JvU2U4ccfv2_iZAhfyaY-LUNmY0abroQzjZ02uUXtV3AGexpmKafSCLddktutztevWq0nTorSNeKd-9NiHH16n22a8guQ_1LA3kNxD_9fkfBIgYXgmTRolJbeDQVuGsptLajxbn1fGusudE4tP-Ppgk9MXIHgAFbyLZwYmwXJ4ho7vmf8rJvld2WSSjsNz0OF9oVCvF/4h8/qG-VYipiQRaVgzjHnqTHzg/h8/h001.n8tS2yNdibF2-qPkEH8ysOxEOX4m7xjLw1MKoLqA7GM" style="text-decoration:none;" target="_blank"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center" style="margin-bottom:12px;margin-top:12px;padding-left:12px;padding-right:12px;"><tr><td align="center" valign="top" class="o" style="padding:12px 12px 12px 12px;;background-color:#FFFFFF;border-color:#F1F1F1;border-radius:5px 5px 5px 5px;border-width:1px 1px 1px 1px;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="right" width="100%"><tr><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="embed-img mob-stack" align="center" valign="top" style="width:35%;min-height:100px;vertical-align:middle;display:none;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxSdB5RCIH6yy1Fm1CYma3ExcsBUgAwWgXosV3JvU2U4ccfv2_iZAhfyaY-LUNmY0abroQzjZ02uUXtV3AGexpmIxfUsUcsRGviET-JJwDr9Td9nUhCNJohdEp3v_i95-msYaKXiz1cyXq-EmpqHdTpTA7LRPiZZvCPrb0wQGdeLSlJM_o4FC_8MwdCo6O2DH82Ip4O4WrSadR4qLzfx4KWyiHZ1uyWuH_Bosaly9i3j_/4h8/qG-VYipiQRaVgzjHnqTHzg/h9/h001._QytFRl1DN3VYQqTFMAWcC0eE0Rids1MNX7G9Z5sCEI" style="text-decoration:none;" target="_blank"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/6159b10f-6f8a-4840-b15d-82d1739d3054/premium_insights_deepseek-prover.jpg?t=1747675392" width="100%" style="display:block;"/></a></td><!--[if mso]></tr></table></td><![endif]--></tr><tr><td align="center" valign="top" class="cc"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="left" valign="top" class="l"><p>How DeepSeek Made The Best Math Prover Ever (+500% vs prev. SoTA)</p></td></tr><tr><td align="left" valign="top" class="m"><p>Premium Insights: A closer look into the DeepSeek Prover series </p></td></tr><tr><td align="left" valign="bottom" class="n" style="vertical-align:bottom;padding-top:12px;"><p style="word-break:break-word;">mail.bycloud.ai/p/how-deepseek-made-the-best-math-prover-ever</p></td></tr></table></td><!--[if mso]><td width="0"><table cellpadding="0" cellspacing="0" border="0" role="presentation" style="display:none;"><tr><![endif]--><td class="mob-hide" align="center" valign="top" style="width:35%;min-height:100px;padding:0px 0px 0px 12px;vertical-align:middle;"><img src="https://beehiiv-images-production.s3.amazonaws.com/uploads/asset/file/6159b10f-6f8a-4840-b15d-82d1739d3054/premium_insights_deepseek-prover.jpg?t=1747675392" width="100%" style="display:block;"/></td><!--[if mso]></tr></table></td><![endif]--></tr></table></td></tr></table></a></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="color:rgb(34, 34, 34);font-family:Georgia, "Times New Roman", serif;font-size:16px;">Plus, we are also scheduling a technical explainer for what FP8 is later this week, so subscribe now to stay tuned!</span></p></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxcTFNXiisgoxAScOL72N4DHVytoeFQ-mBBAkS5Q6m2B0JsVl7Ceocxpip3rVxsdDNU-abgLsphn3E_51ocYe78bpPmEhUU8HruSVyMtxjK0WyvPFT05xO4_m-Ra5-m3Np3FHul3qddDf3YvY4jz_FsczquVyxupkycK2Qwxd1YVCuKcJWv6o3AYjF2_41WATSg/4h8/qG-VYipiQRaVgzjHnqTHzg/h10/h001.r1SGU-O_5Gho3DbpYYqaP3YIB2Rey2qA0Gen09Sj3TQ" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Upgrade To Premium Insights </a></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style=""><sub><a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoGymQ3NNPtd5dE5MV_8UgjIDFPVXngz8pvQBldSW42yhUe_Qiq6DgEPMEBuPL9yfRpXelTiuu2kS8pLFvsoem_XoZoy_n13sTKUhZIbl0VH6/4h8/qG-VYipiQRaVgzjHnqTHzg/h11/h001.fBuzLCApc5-4w3mDZpwhfZMOp2HZhAEWEy6J_3QlxfA" target="_blank" rel="noopener noreferrer nofollow"><span>Advertise with The AI Timeline!</span></a></sub></span></p></td></tr></table></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="log-linear-attention" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;">Log-Linear Attention </h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style=""><i>Guo et al. [Massachusetts Institute of Technology, Princeton University, Together AI, Carnegie Mellon University]</i></span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;"> ♥ 1.4k </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> LLM Attention </span></span></p></td></tr><tr><td id="efficient-sequence-modeling-with-lo" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Efficient Sequence Modeling with Log-Linear Attention</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Attention mechanisms in Transformers are necessary to model sequences but it also has some significant challenges. First of all, it requires quadratic compute and linear memory costs which grows with sequence length. Although linear attention and state-space models offer linear-time alternatives, they rely on a fixed-size hidden state, which fundamentally limits their ability to capture extensive context. This degrades their performance in long-context scenarios, such as associative recall tasks. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The researchers of this paper have introduced <span style=""><i>log-linear attention</i></span>, which is a novel approach that bridges the gap between efficiency and expressiveness. Instead of using a single hidden state, it maintains a logarithmically growing set of states, and enables richer context modeling without sacrificing hardware-friendly parallelism. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/503f1f4d-0154-4d1e-8615-633c48af3d92/recurrent.png?t=1749567587" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Log Linear attention</p></td></tr></table></td></tr><tr><td id="how-log-linear-attention-works" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">How Log-Linear Attention Works</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The Log-linear attention mechanism uses a hierarchical partitioning scheme inspired by Fenwick trees. For each token position, the input sequence is divided into disjoint buckets of power-of-two lengths, prioritizing fine-grained resolution for recent tokens and coarser summaries for distant ones. This structure ensures the number of hidden states grows logarithmically with sequence length. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Additionally, each bucket contributes to the output via a data-dependent scalar weight λ, projected from the input. These weights allow the model to adaptively blend information across temporal scales. For instance, recent context might dominate via larger λ values for finer buckets, while older context is compressed into broader summaries. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/2325a757-a0f9-46b5-a900-fc6acd651086/image.png?t=1749567676" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> During inference, the model updates states incrementally. When processing a new token, it inserts the current memory into the finest bucket (level 0). Buckets up to a dynamically determined level are merged and promoted to coarser resolutions, maintaining only O(log T) active states. This enables constant-memory decoding with logarithmic time per step. For training, a parallel algorithm decomposes computations into intra-chunk and inter-chunk phases. Intra-chunk interactions use standard matrix multiplications, while inter-chunk dependencies leverage hierarchical scans (applying existing linear-attention primitives across chunks). The result is O(T log T) time complexity, optimized for modern hardware through matmul-rich operations. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/ecb9b01b-04d5-46cb-85fd-88623c9ea690/image.png?t=1749567732" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Chunk-wise algorithm for decomposition of matrix.</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> This framework generalizes linear attention variants like Mamba-2 and Gated DeltaNet. By composing their structured masking matrices with the log-linear hierarchy, these models gain multi-scale memory without altering their core interaction mechanics. For example, log-linear Mamba-2 retains its data-dependent gating but attends to logarithmic states instead of a single fixed state. </p></td></tr><tr><td id="performance-and-implications" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Performance and Implications</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The initial results show that log-linear attention performs well on synthetic associative recall tasks (MQAR), it maintains near-perfect accuracy as sequence length scales, while linear baselines like DeltaNet degrade significantly. In language modeling pretrained on 50B tokens, log-linear variants of Mamba-2 and Gated DeltaNet reduce perplexity and improve performance on 6–8 of 9 commonsense reasoning benchmarks. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/e5ed622e-ebd2-4140-9822-ce41c02b99ea/image.png?t=1749567803" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Additionally, log-linear Gated DeltaNet matches or exceeds its linear counterpart on real-world retrieval tasks (e.g., SQuAD and TriviaQA), even at 16K-token contexts. However, custom kernels are needed to optimize intra-chunk operations, and the hierarchical inductive bias may not suit all applications. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28yjf9KIXZdsXoh1WlHvvKnIfbmHjw5_gOnUr6URAs4qTKHy88wD3SOJtIurfhSIdBz_NQTFnELrzt51R0k-tMlV-yJjPr2RbOVlkn4MX45M/4h8/qG-VYipiQRaVgzjHnqTHzg/h12/h001.1mqXy0M-FQTCQFerkMmsXAFQ-4EP0qif1TWMrnIhCOI" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Read Full Paper </a></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="beyond-the-8020-rule-high-entropy-m" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;">Beyond the 80/20 Rule: High-Entropy Minority Tokens Drive Effective Reinforcement Learning for LLM Reasoning </h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style=""><i>Wang et al. [QwenTeam, LeapLab]</i></span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;"> ♥ 340 </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> LLM RLVR </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span></p></td></tr><tr><td id="high-entropy-tokens-in-ll-ms" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">High-Entropy Tokens in LLMs</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Reinforcement Learning with Verifiable Rewards (RLVR) has significantly advanced reasoning in large language models, but its inner workings are still unclear. Current methods update all tokens equally during training, overlooking their distinct roles in reasoning chains. This gap led researchers to investigate RLVR through token entropy patterns, which revealed that only a critical minority of tokens steer reasoning paths. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> This research proposes focusing updates on these high-entropy "forking tokens" to understand the RLVR process and unlock additional efficiency gains. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/b68b97b2-63b6-4a13-8c8e-985697ff1be9/teaser_png.png?t=1749567939" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td id="how-token-entropy-guides-reasoning" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">How Token Entropy Guides Reasoning</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> In Chain-of-Thought reasoning, tokens split into two functional groups. Roughly 80% are low-entropy tokens, which contain deterministic elements like word suffixes or code fragments that follow established paths. The remaining 20% are high-entropy "forking tokens," such as logical connectors ("however," "thus") or decision points ("assume," "define"). These introduce uncertainty, branching reasoning into multiple pathways. For instance, in a math problem, a token like "suppose" might pivot the solution between algebraic or geometric approaches. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> During RLVR training, models largely preserve the base model’s entropy distribution. Policy updates primarily adjust high-entropy tokens and subtly increase their exploratory potential. On the other hand, low-entropy tokens show minimal entropy fluctuation, acting as stable anchors. This selective adaptation suggests RLVR refines reasoning by optimizing forks instead of rewriting entire paths. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/5fc999bf-8b8e-46b5-9bf3-ff3221abceda/inference_with_varying_temp.png?t=1749567994" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Average scores of AIME 2024 and AIME 2025.</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> After that, the method masks gradients for low-entropy tokens and updates only the top 20% high-entropy tokens during training. This takes advantage of the finding that forking tokens drive nearly all performance gains. By concentrating learning where uncertainty matters, the approach reduces computational overhead while amplifying exploration. </p></td></tr><tr><td id="scaling-gains-for-high-entropy-toke" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Scaling Gains for High-Entropy Tokens</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The initial benchmark results show striking efficiency as updating only 20% of tokens matches full-update performance on smaller models like Qwen3-8B. For larger models, it delivers substantial boosts, the scores for Qwen3-32B jumped +11.04 on AIME’25 and +7.71 on AIME’24, which sets new state-of-the-art benchmarks. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/a43a101b-e562-402b-9ea0-e2e2f87a49d8/cot_entropy.png?t=1749567954" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Entropy patterns in the chain of thoughts of LLMs.</p></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> On the other hand, training solely on low-entropy tokens caused sharp decline in performance. Additionally, the performance gains scaled with model size, and the technique generalized to out-of-domain tasks like LiveCodeBench, which hints at broader applicability. The token-entropy metric provides a new perspective on why RLVR outperforms supervised fine-tuning. It optimizes exploratory forks rather than memorizing paths. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/6c8506d1-1fc3-4d81-9136-caa20140c8ab/rlvr_performance_table.png?t=1749568041" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr><tr><td align="center" valign="top" class="t" style="width:600px; padding: 4px 0px 4px 0px;"><p>Comparison between vanilla DAPO using all tokens and DAPO using only the top 20% high-entropy tokens.</p></td></tr></table></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28yjf9KIXZdsXoh1WlHvvKmgHp5RFKobKEhxl2leQ7QCn8qouppDMhxLfQvKShaTQPZOsEmI0wZ78p4oqxd_NuCe2bWXE-hLepvPdDUIFIxF/4h8/qG-VYipiQRaVgzjHnqTHzg/h13/h001.I_7nM2Hry6menTVNFJfylITL0CSgkDgpdkbAZmz8nqs" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Read Full Paper </a></td></tr></table></td></tr><tr><td><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" style=""><tr><td bgcolor="#222222" style="background-color:#222222;padding:0.0px 0.0px 0.0px 0.0px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"></p></td></tr></table></td></tr></table></td></tr><tr><td id="how-much-do-language-models-memoriz" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;">How much do language models memorize? </h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style=""><i>Morris et al. [FAIR at Meta, Google DeepMind, Cornell University, NVIDIA]</i></span></p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"><span style="background-color:#e0e0e0;"><span style="color:rgb(255, 58, 58);font-size:0.6rem;"> ♥ 3.2k </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> LLM Interp </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span><span style="background-color:#e0e0e0;"><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> bycloud’s pick </span></span><span style="color:rgb(44, 129, 229);font-size:0.6rem;"> </span></p></td></tr><tr><td id="understanding-memorization-in-langu" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Understanding Memorization in Language Models</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Language models are getting incredibly powerful, but many people have the same question about them. Are these models really intelligent or are these models only memorizing answers? Existing definitions and approaches often fail to provide this answer as merely extracting a string from a model doesn’t prove memorization, and verbatim reproduction isn’t always necessary. This ambiguity makes it hard to measure what’s truly stored in a model’s parameters. This paper tackles this by redefining memorization through an information-theoretic lens, and offering a clearer way to quantify unintended memorization and generalization. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/03dac561-49d1-4351-bcc1-ba6c6aaf924e/image.png?t=1749568147" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td id="how-much-language-models-actually-m" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">How Much Language Models Actually Memorize</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The authors of this study propose using the Kolmogorov complexity, which measures the shortest description length of data. Here, memorization is defined as the reduction in bits needed to encode a data point when using the model as a reference. Unintended memorization captures sample-specific details stored beyond what’s expected from generalization. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> You can think of it as a model that has "memorized" a data point if that point can be represented more compactly when the model is used as a reference. Concretely, they measure unintended memorization in <span style="font-weight:700;"><b>bits</b></span>: the difference between the inherent information in a data point and its compressed size when leveraging the model. This separates unintended memorization (sample-specific details) from generalization (learned patterns). For example, a model might memorize an exact phone number (unintended) versus learning arithmetic to generate new ones (generalization). </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/3d366791-06ee-42b1-90b6-92104256adb0/image.png?t=1749568189" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> To validate this, the team first eliminated generalization variables using synthetic datasets of random bit strings. Here, every bit must be memorized since patterns don’t exist. They trained GPT-style transformers of varying sizes and precision, finding models that store 3.51 bits for bfloat16 precision and 3.83 for float32. Next, they applied the method to real text (from the deduplicated FineWeb dataset). Results showed unintended memorization peaks when dataset size approaches model capacity. Beyond this point, double descent occurs: test loss drops sharply as models shift from memorizing samples to generalizing patterns. </p></td></tr><tr><td id="capacity-limits-double-descent-and-" class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:normal;padding:0px 28px;text-align:left;"><h3 style="color:#2A2A2A;font-weight:normal;mso-line-height-alt:125.0%;"><span style="color:rgb(67, 67, 67);">Capacity Limits, Double Descent, and Membership Inference in LLMs</span></h3></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> The results of this study reveal clear patterns. On synthetic data, models plateau in memorization once dataset size exceeds their capacity (roughly 3.5 bits per parameter). For real text, double descent emerges precisely when dataset size surpasses this capacity: test loss spikes as models shift from memorizing samples to learning general patterns. This transition forces models to share information across data points, enabling generalization. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; " class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 auto;"><tr><td align="center" valign="top" style="width:600px;"><img src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/uploads/asset/file/a0cd05c5-4c03-4b99-b795-06b04f8bfad9/image.png?t=1749568248" alt="" height="auto" width="600" style="display:block;width:100%;" border="0"/></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Membership inference attacks (predicting if a sample was in the training data) follow a scaling law based on the capacity-to-dataset ratio. Success drops predictably as datasets grow, nearing random guessing (F1 ~0.5) for large datasets like those in modern LLMs. Validation on GPT-2-scale models confirms this: at high data-to-capacity ratios, attacks fail. The authors predict most large models are trained on too much data for reliable membership inference. </p></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> In future, perhaps we can use this approach to design architectures that generalize more efficiently without over-retaining data by quantifying unintended memorization. </p></td></tr><tr><td align="center" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:center;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.fUNb4GdFo9D3F8WuLArtoV5sElgytBlvJRzI9WtI92YniLp0ogFXH9JkYUqyJruLSdapE7GfVRgegdIPxya3R3D3UDWXG8mRfXljwgpq4QSY4He0J_QXAWjb4X8pji4J/4h8/qG-VYipiQRaVgzjHnqTHzg/h14/h001.JmtyaD0u6TWq-EYaOQOrILTqhowct8xkbH945HzgIao" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Read Full Paper </a></td></tr></table></td></tr><tr><td align="center" valign="top"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top" style="font-size:0px;line-height:0px;padding:30px 0px 30px;" class="dd"><table class="j" role="none" width="50%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td> </td></tr></table></td></tr><tr><td class="dd" align="left" valign="top" style="color:#2A2A2A;font-weight:Bold;padding:0px 28px;text-align:left;"><h2 style="color:#2A2A2A;font-weight:Bold;mso-line-height-alt:150.0%;">Share The AI Timeline</h2></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> You currently have <strong>0</strong> referrals. </p></td></tr><tr><td align="left" valign="top" style="padding-bottom:20px;padding-left:28px;padding-right:28px;padding-top:20px; display:none;width:0px;max-height:0px;overflow:hidden;mso-hide:all;height:0;font-size:0;max-height:0;line-height:0;margin:0 auto;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" style="margin:0 auto 0 0;"><tr><td align="center" valign="top" style="width:300px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsGNUqyW5TiZkyMsF1yreu0byy2KW36J1wDdpoLuXg2TU1F1OW8OHoHaU4-ZmrZpPU4RN-crQCEimD190CSn9fPvQfHEYx7CTSXIidbla7LLK/4h8/qG-VYipiQRaVgzjHnqTHzg/h15/h001.JZiY5L4G1fWXKuFL3a81voOgtOwJyaPtd2wLzQ9Jqtc" rel="noopener noreferrer nofollow" style="text-decoration:none;" target="_blank"><img src="" alt="" height="auto" width="300" style="display:block;width:100%;" border="0"/></a></td></tr></table></td></tr><tr><td align="left" valign="top" style="padding-bottom:14px;padding-left:28px;padding-right:28px;padding-top:14px;text-align:left;width:100%;word-break:break-word;" class="dd"><table role="none" border="0" cellspacing="0" cellpadding="0" align="left" style="margin:14px auto 14px auto;"><tr><td align="center" valign="middle" height="44.75" style="height:44.75px;background-color:#2C81E5;border-color:#DFD150;border-radius:10px 10px 10px 10px;border-style:solid;border-width:0px 0px 0px 0px;color:#FFFFFF;font-family:'Open Sans','Segoe UI','Apple SD Gothic Neo','Lucida Grande','Lucida Sans Unicode',sans-serif;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsGNUqyW5TiZkyMsF1yreu0byy2KW36J1wDdpoLuXg2TU1F1OW8OHoHaU4-ZmrZpPU4RN-crQCEimD190CSn9fPvQfHEYx7CTSXIidbla7LLK/4h8/qG-VYipiQRaVgzjHnqTHzg/h16/h001.fq42QDfmJ5KkosjYt15ASMchCIK82Tnb-YGI7nHW9bc" target="_blank" rel="noopener noreferrer nofollow" style="color:#FFFFFF;display:block;font-size:16px;font-size:16px;font-weight:normal;padding:0px 14px;padding:14px 14px 14px 14px;text-decoration:none;"> Click to Share </a></td></tr></table></td></tr><tr><td class="dd" align="left" style="padding:0px 28px;text-align:left;word-break:break-word;"><p style="mso-line-height-alt:150.0%;"> Or copy and paste this link to others: <a class="link" href="https://mail.bycloud.ai/subscribe?ref=6SqUHb8KiF&_bhlid=bf7a73b936aab597b0df9777ef50b28c5a049d32" target="_blank" rel="noopener noreferrer nofollow" clicktracking="off"><span>https://mail.bycloud.ai/subscribe?ref=6SqUHb8KiF</span></a></p></td></tr><tr><td align="center" valign="top" style="font-size:0px;line-height:0px;padding:30px 0px 30px;" class="dd"><table class="j" role="none" width="50%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td> </td></tr></table></td></tr></table></td></tr><tr><td class="dd" style="padding: 20px;"><table width="100%" cellpadding="0" cellspacing="0" role="none" style="max-width:520px;margin:0 auto;"><tr><td class="q" style="padding:16px 16px 6px 16px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.tLfGW26lAwaS9gFg17HSoDDFT6eh5Nsg0xYVQj-h6I3o9m2k79_qw4izMYhmcI36ix9y6oBsLo5HTewVeo9WG1hb_Xdv5hfq5n1dapdIw1tQkuGFYtdVZqb0s1C4YYgYXN3adklrSrNBowxNDQuE72Kr8hInmNOB9bZ6xqLGBFI/4h8/qG-VYipiQRaVgzjHnqTHzg/h17/h001.1swGr7X_wBGaZS1YPEdcFxbeKpvUBPNro5N6WaLNbu8" style="text-decoration:none !important;"><table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"><tr><td width="100%" style="padding: 0 0 14px 0;text-decoration:none;width:100%;"><table width="100%" cellpadding="0" cellspacing="0" border="0" role="none"><tr><td width="36" style="width:36px;"><img src="https://pbs.twimg.com/profile_images/1698572487909400576/BvncwnrP_normal.jpg" alt="tw profile: The AI Timeline" style="display:block;width:36px;height:36px;border-radius:50%;border:0;"/></td><td width="400" style="padding:0 0 0 8px;text-decoration:none;"><span style="display:block;font-size:14px;color:#1c2022;font-weight:700;"> The AI Timeline </span><span style="display:block;color:#697882;font-size:14px;"> @TheAITimeline </span></td><td width="24" align="right" style="vertical-align:text-top;"><img width="24" height="24" loading="lazy" alt="tw" style="border:0;" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_logo.png"/></td></tr></table></td></tr><tr></tr><tr><td style="word-break:break-word;"><p>🚨This week's top AI/ML research papers:</p><p>- Log-Linear Attention <br>- Beyond the 80/20 Rule <br>- Why Gradients Rapidly Increase Near the End of Training <br>- How much do language models memorize? <br>- General agents need world models <br>- The Illusion of Thinking <br>- MiMo-VL Technical Report <br>-</p></td></tr><tr><td style="padding:12px 0 0 0;"></td></tr><tr><td align="center" style="padding:8px 0 0 0;width:480px;"><img src="https://pbs.twimg.com/media/Gs91Mo0WUAAdgw2.jpg" width="480" height="auto" style="display:block;border:1px solid #E1E8ED;border-radius:5px;width:100%;max-width:480px;height:auto;"/></td></tr><tr><td height="8" style="line-height:1px;font-size:1px;height:8px;"> </td></tr><tr><td align="left" valign="top" class="s"><p>2:01 AM • Jun 9, 2025</p></td></tr><tr><td height="10" style="line-height: 1px; font-size: 1px; height: 10px;"> </td></tr><tr><td height="1" bgcolor="#e1e8ed" style="line-height:0px;font-size:0px;height:1px;"></td></tr><tr><td height="10" style="line-height:1px;font-size:1px;height:10px;"> </td></tr><tr><td align="left" valign="top" class="s"><p><b style="color:#1C2022">2.05K</b> Likes <b style="color:#1C2022">147</b> Retweets </p></td></tr><tr><td align="left" valign="top" class="s"><div align="center" style="text-align:center;margin-top:4px;margin-bottom:4px;padding:8px;border:1px solid #ccd6dd;border-radius:9999px;color:#1B95E0"><b>10 Replies</b></div></td></tr></table></a></td></tr></table></td></tr></table></td></tr></table></td></tr><tr><td align="center" valign="top"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td><tr><td class="b" align="center" valign="top" bgcolor="#2a2a2a" style="padding:0px 0px 0px 0px;border-style:solid;border-width: 0px 0px 0px 0px;border-color: #2a2a2a;border-bottom-left-radius:10px;border-bottom-right-radius:10px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top" bgcolor="#73ddff" style="padding:12px"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td><span style="padding-left:1px;"></span></td><td align="center" valign="middle" width="75" style="width:75px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.1muhFWIqieRYpaJ-FbWSCQqcWoV4NNHHr5SkP9THApWuHAAlWLQxI3Q_IqFmt_DcyAxeC8jDApCnHmMSBGpBb5sgtimvBYgxRX-Rp7s0F3LjCHoSwdhr83OBqRFhJ1y_/4h8/qG-VYipiQRaVgzjHnqTHzg/h18/h001.9pj8Fvr6hnvrO2ihf35XOxBe8MmZrZ_jCdEr314Le9I" style="text-decoration:none;"><img width="22" height="22" alt="tw" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/x_dark.png"/></a></td><td align="center" valign="middle" width="75" style="width:75px;"><a href="https://elink4f7.mail.bycloud.ai/ss/c/u001.amatuKKICSickUKplYJXmBoQnQ9VXnB2zTxBG4HeHBgjMqVxpoXRdj01cjwyoVlHgiebEOgBvwHtevoVpsSvpn3Q1di2ml6sb3cBM-X6IStQbj_zQSVGWJ8AAmPw2en2/4h8/qG-VYipiQRaVgzjHnqTHzg/h19/h001.jqalMqsAQ_0wmlzKQVg4z2KhG6tXFoU1StaJij_aAG8" style="text-decoration:none;"><img width="22" height="16" alt="yt" border="0" style="display:block;max-width:22px;color:Dark" src="https://media.beehiiv.com/cdn-cgi/image/fit=scale-down,format=auto,onerror=redirect,quality=80/static_assets/youtube_dark.png"/></a></td><td><span style="padding-left:1px;"></span></td></tr></table></td></tr><tr><td height="10" style="line-height:1px;font-size:1px;height:10px;"> </td></tr><tr><td class="w" align="center" valign="top" style="padding:15px 15px 15px 15px;"><table role="none" width="100%" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td align="center" valign="top"><p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"> Update your email preferences or unsubscribe <a class="link" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.c6q0w4g5sodbtO4I1B_pxWc4htTObwdorovK0nFHVH-4pUdVE0ELYH5DsNemk732SjNwhPNJ25r0O8B5vYifsBhEpz-DJgyVFmavJPa0OyKRRnvw4o7XGyvIv7PRofnmiRtHwDvwM9TPWjMJFBcOIwVtOsWFY1uxbWgXeiW1bNPPzCIru-jM2BDni4PGSdFfvCVPclQ7N8WlijqHJZakDaKPaW-cJ7_zVNKS8AtdYapbR9pJYukalBBqaU09YhpBLRsO0kfi9KuO2OAWpqWjw6N5TdDueIdXTEitm2tweSHf_DC7fbVQ1V4ZsE60ihob7Pe-G-0ppixhcwz1GLw3QA1rlT-LifmYfWIn0hKUhaB8RGY1y-yEbuSmUTjNJaOgGqZCd4klRtRm3ikuZFCJaHH08Va_D7z1Plir_wzPlgM/4h8/qG-VYipiQRaVgzjHnqTHzg/h20/h001.saBp-N9v1VWuWAtZZ-3xzNiOg91ea6eBEFM51kE6Hk4" style="text-decoration:underline;text-decoration-color:#FFFFFF!important;color:#FFFFFF!important;"> here</a></p><p class="copyright" style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"> © 2025 bycloudai </p><p style="font-family:'Verdana',Geneva,sans-serif;color:#FFFFFF!important;"> 228 Park Ave S, #29976, New York, New York 10003, United States </p></td></tr><tr style="display: table-row !important;"><td align="center" valign="top" style="padding-top:20px;" style="display:table-cell !important;"><table role="none" border="0" cellspacing="0" cellpadding="0" align="center" style="display:table !important;"><tr style="display:table-row !important;"><td class="u" align="center" valign="middle" height="32" style="height:32px;display:table-cell !important; max-height: 32px !important;margin:0px !important; background-color: #ffffff !important;"><a style="line-height:32px !important;text-decoration:none;display:block !important;" href="https://elink4f7.mail.bycloud.ai/ss/c/u001.DUiN96-Eq7pUHzwEhy5j28olDWFpV5DDKfdk_OdOKOiH0qdVkXtVl7q-5Fy5ywMFcT5AbxXrXtwRzzz-K0iEjERgBoRsKODzOzC-HP6KffvMj9QD99-7ErGBaYDZSnONbnyGr-aJfTDorf0zkj9PFDlDu7kocbPA87OVpQbrcg_FiiRua8N3F10p-X12gN3VhJKKW-_4NvRlMOmJkSOhzb1KAzcD-lp767gwdtLRJvQbRv0q_-3UXnqjfzHq61Hv/4h8/qG-VYipiQRaVgzjHnqTHzg/h21/h001.SDEPQwckWGtx2wc4Ih6Dh-AdXTuArB6EKLc5X1LuPww"><img src="https://media.beehiiv.com/output-onlinepngtools.png" width="16" alt="beehiiv logo" style="display:inline-block !important;max-width:16px !important; vertical-align:-3px !important;width: 16px !important;" border="0"/><span style="padding-left:11px !important;display: inline-block !important;">Powered by beehiiv</span></a></td></tr></table></td></tr><tr><td align="left" valign="top" height="2" style="height:2px;"><a href='https://elink4f7.mail.bycloud.ai/ss/c/u001.CxDkkVpJsBdVoe83c_tBWsHIaP4XNp0WgUYqLvHcKk_3uqk_KIkz4ddLinhFbud6JuxLFdSUhYnR7b1NSsmbtzXNGNblnEEMKUtkCAjkn8Y/4h8/qG-VYipiQRaVgzjHnqTHzg/h22/h001.jH0p4qiSD2IVEygcjqcNboXyOJuGlAYkcz3YoyPhlzg' style="color: #2a2a2a !important; cursor: default; font-size: 1px; text-decoration: none;"> Terms of Service </a></td></tr></table></td></tr></table></td></tr></td></tr></table></td></tr></table></td></tr></table></td></tr></table></div></body></html>