stable-diffusion-aws-extension/en/user-guide/webUI/endpoint-autoscaling/index.html

1567 lines
44 KiB
HTML

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="canonical" href="https://awslabs.github.io/stable-diffusion-aws-extension/user-guide/webUI/endpoint-autoscaling/">
<link rel="icon" href="https://s3.cn-north-1.amazonaws.com.cn/aws-assets-prod/libra-css/images/site/fav/favicon.ico">
<meta name="generator" content="mkdocs-1.6.0, mkdocs-material-9.5.30">
<title>SageMaker Async Endpoint Autoscaling - Extension for Stable Diffusion on AWS</title>
<link rel="stylesheet" href="../../../assets/stylesheets/main.3cba04c6.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<script>__md_scope=new URL("../../..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#sagemaker-async-endpoint-autoscaling" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<div data-md-color-scheme="default" data-md-component="outdated" hidden>
</div>
<header class="md-header" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../../.." title="Extension for Stable Diffusion on AWS" class="md-header__button md-logo" aria-label="Extension for Stable Diffusion on AWS" data-md-component="logo">
<img src="https://s3.cn-north-1.amazonaws.com.cn/aws-assets-prod/libra-css/images/site/fav/favicon.ico" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
Extension for Stable Diffusion on AWS
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
SageMaker Async Endpoint Autoscaling
</span>
</div>
</div>
</div>
<script>var media,input,key,value,palette=__md_get("__palette");if(palette&&palette.color){"(prefers-color-scheme)"===palette.color.media&&(media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']"),palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent"));for([key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<div class="md-header__option">
<div class="md-select">
<button class="md-header__button md-icon" aria-label="Select language">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.52 17.52 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11.76-2.04M18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2l-4.5-12m-2.62 7 1.62-4.33L19.12 17h-3.24Z"/></svg>
</button>
<div class="md-select__inner">
<ul class="md-select__list">
<li class="md-select__item">
<a href="/stable-diffusion-aws-extension/en/" hreflang="en" class="md-select__link">
English
</a>
</li>
<li class="md-select__item">
<a href="/stable-diffusion-aws-extension/zh/" hreflang="zh" class="md-select__link">
简体中文
</a>
</li>
<li class="md-select__item">
<a href="/stable-diffusion-aws-extension/ja/" hreflang="ja" class="md-select__link">
日本語
</a>
</li>
</ul>
</div>
</div>
</div>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/awslabs/stable-diffusion-aws-extension" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
awslabs/stable-diffusion-aws-extension
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../../.." class="md-tabs__link">
Implementation Guide
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../../.." title="Extension for Stable Diffusion on AWS" class="md-nav__button md-logo" aria-label="Extension for Stable Diffusion on AWS" data-md-component="logo">
<img src="https://s3.cn-north-1.amazonaws.com.cn/aws-assets-prod/libra-css/images/site/fav/favicon.ico" alt="logo">
</a>
Extension for Stable Diffusion on AWS
</label>
<div class="md-nav__source">
<a href="https://github.com/awslabs/stable-diffusion-aws-extension" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
awslabs/stable-diffusion-aws-extension
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" >
<label class="md-nav__link" for="__nav_1" id="__nav_1_label" tabindex="0">
<span class="md-ellipsis">
Implementation Guide
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1">
<span class="md-nav__icon md-icon"></span>
Implementation Guide
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_1" >
<label class="md-nav__link" for="__nav_1_1" id="__nav_1_1_label" tabindex="0">
<span class="md-ellipsis">
Solution Overview
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_1">
<span class="md-nav__icon md-icon"></span>
Solution Overview
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../.." class="md-nav__link">
<span class="md-ellipsis">
Overview
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../solution-overview/features-and-benefits/" class="md-nav__link">
<span class="md-ellipsis">
Features and benefits
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../../cost/" class="md-nav__link">
<span class="md-ellipsis">
Cost
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_3" >
<label class="md-nav__link" for="__nav_1_3" id="__nav_1_3_label" tabindex="0">
<span class="md-ellipsis">
Architecture Overview
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_3">
<span class="md-nav__icon md-icon"></span>
Architecture Overview
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../architecture-overview/architecture/" class="md-nav__link">
<span class="md-ellipsis">
Architecture diagram
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../architecture-overview/architecture-details/" class="md-nav__link">
<span class="md-ellipsis">
Architecture details
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_4" >
<label class="md-nav__link" for="__nav_1_4" id="__nav_1_4_label" tabindex="0">
<span class="md-ellipsis">
Plan your deployment
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_4">
<span class="md-nav__icon md-icon"></span>
Plan your deployment
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../plan-deployment/security/" class="md-nav__link">
<span class="md-ellipsis">
Security
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../plan-deployment/regions/" class="md-nav__link">
<span class="md-ellipsis">
Supported regions
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../plan-deployment/quotas/" class="md-nav__link">
<span class="md-ellipsis">
Quotas
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_5" >
<label class="md-nav__link" for="__nav_1_5" id="__nav_1_5_label" tabindex="0">
<span class="md-ellipsis">
Deploy the solution
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_5">
<span class="md-nav__icon md-icon"></span>
Deploy the solution
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../deployment/permissions/" class="md-nav__link">
<span class="md-ellipsis">
Permissions
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../deployment/template/" class="md-nav__link">
<span class="md-ellipsis">
AWS CloudFormation template
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../deployment/deployment/" class="md-nav__link">
<span class="md-ellipsis">
Deploy for SD webUI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../deployment/deployment_comfyui/" class="md-nav__link">
<span class="md-ellipsis">
Deploy for ComfyUI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../deployment/deployment_for_existing_users/" class="md-nav__link">
<span class="md-ellipsis">
Update SD webUI
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../../uninstall/" class="md-nav__link">
<span class="md-ellipsis">
Uninstall the solution
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_7" >
<label class="md-nav__link" for="__nav_1_7" id="__nav_1_7_label" tabindex="0">
<span class="md-ellipsis">
Use the solution
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_7">
<span class="md-nav__icon md-icon"></span>
Use the solution
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../training-guide/" class="md-nav__link">
<span class="md-ellipsis">
Kohya Training guide
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_7_2" >
<label class="md-nav__link" for="__nav_1_7_2" id="__nav_1_7_2_label" tabindex="0">
<span class="md-ellipsis">
SD webUI guide
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="3" aria-labelledby="__nav_1_7_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_7_2">
<span class="md-nav__icon md-icon"></span>
SD webUI guide
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../multi-user/" class="md-nav__link">
<span class="md-ellipsis">
Configure API and Users Management
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../CloudAssetsManage/" class="md-nav__link">
<span class="md-ellipsis">
Cloud Assets Management
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../txt2img-guide/" class="md-nav__link">
<span class="md-ellipsis">
txt2img guide
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../img2img-guide/" class="md-nav__link">
<span class="md-ellipsis">
img2img guide
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../controlnet-guide/" class="md-nav__link">
<span class="md-ellipsis">
controlNet guide
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../extensions-guide/" class="md-nav__link">
<span class="md-ellipsis">
Other Extensions guide
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../ComfyUI/inference/" class="md-nav__link">
<span class="md-ellipsis">
ComfyUI guide
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_8" >
<label class="md-nav__link" for="__nav_1_8" id="__nav_1_8_label" tabindex="0">
<span class="md-ellipsis">
Developer guide
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_8_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_8">
<span class="md-nav__icon md-icon"></span>
Developer guide
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../developer-guide/source/" class="md-nav__link">
<span class="md-ellipsis">
Source code
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../developer-guide/access/" class="md-nav__link">
<span class="md-ellipsis">
API Access Restriction
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../developer-guide/api_upload_ckpt/" class="md-nav__link">
<span class="md-ellipsis">
API Upload Checkpoint Process
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../developer-guide/api_inference_process/" class="md-nav__link">
<span class="md-ellipsis">
API Inference Process
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../developer-guide/api_debugger/" class="md-nav__link">
<span class="md-ellipsis">
API Inference Debugger
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../developer-guide/api_authentication/" class="md-nav__link">
<span class="md-ellipsis">
API Authentication
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../developer-guide/api/" class="md-nav__link">
<span class="md-ellipsis">
API Details
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../developer-guide/byoc/" class="md-nav__link">
<span class="md-ellipsis">
Custom Container
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../../../troubleshooting/" class="md-nav__link">
<span class="md-ellipsis">
Troubleshooting
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../faq/" class="md-nav__link">
<span class="md-ellipsis">
FAQ
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../revisions/" class="md-nav__link">
<span class="md-ellipsis">
Revisions
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../../notices/" class="md-nav__link">
<span class="md-ellipsis">
Notices
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#overview" class="md-nav__link">
<span class="md-ellipsis">
Overview
</span>
</a>
<nav class="md-nav" aria-label="Overview">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#example-of-sagemaker-async-endpoint-autoscaling-policy-below" class="md-nav__link">
<span class="md-ellipsis">
Example of Sagemaker async endpoint autoscaling policy below:
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1 id="sagemaker-async-endpoint-autoscaling">SageMaker Async Endpoint Autoscaling</h1>
<p>Amazon SageMaker provides capabilities to automatically scale model inference endpoints in response to the changes in traffic patterns. This document explains how autoscaling is enabled for an Amazon SageMaker async endpoint created by this Solution</p>
<h2 id="overview">Overview</h2>
<p>The solution provided enables autoscaling for a specific endpoint and variant in Amazon SageMaker. Autoscaling is managed through two scaling policies:</p>
<ol>
<li><strong>Target Tracking Scaling Policy</strong>: This policy adjusts the desired instance count based on the <code>CPUUtilization</code> metric. It aims to keep the CPU utilization at 50%. If the average CPU utilization is above 50 for 5 minutes, the alarm will trigger application autoscaling to scale out Sagemaker endpoint until it reach the maximum number of instances.</li>
</ol>
<p>The scaling policy based on CPU utilization is defined using the <code>put_scaling_policy</code> method. It specifies the following parameters:
- <code>TargetValue</code>: 50% CPU utilization
- <code>ScaleInCooldown</code>: 300 seconds
- <code>ScaleOutCooldown</code>: 300 seconds</p>
<ol>
<li><strong>Step Scaling Policy</strong>: This policy allows you to define steps for scaling adjustments based on the <code>HasBacklogWithoutCapacity</code> metric. This policy is created to let application autoscaling increase the instance number from 0 to 1 when there is inference request but endpoint has 0 instance.</li>
</ol>
<p>The step scaling policy is defined to adjust the capacity based on the <code>HasBacklogWithoutCapacity</code> metric. It includes:
- <code>AdjustmentType</code>: ChangeInCapacity
- <code>MetricAggregationType</code>: Average
- <code>Cooldown</code>: 300 seconds
- <code>StepAdjustments</code>: Specifies the scaling adjustments based on the size of the alarm breach.</p>
<h3 id="example-of-sagemaker-async-endpoint-autoscaling-policy-below">Example of Sagemaker async endpoint autoscaling policy below:</h3>
<div class="highlight"><pre><span></span><code><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;ScalingPolicies&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;PolicyARN&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Your PolicyARN&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;PolicyName&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;HasBacklogWithoutCapacity-ScalingPolicy&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;ServiceNamespace&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;sagemaker&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;ResourceId&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;endpoint/esd-type-c356f91/variant/prod&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;ScalableDimension&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;sagemaker:variant:DesiredInstanceCount&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;PolicyType&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;StepScaling&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;StepScalingPolicyConfiguration&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;AdjustmentType&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;ChangeInCapacity&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;StepAdjustments&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;MetricIntervalLowerBound&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">0.0</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;ScalingAdjustment&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="nt">&quot;Cooldown&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">300</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;MetricAggregationType&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Average&quot;</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="nt">&quot;Alarms&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;AlarmName&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;stable-diffusion-hasbacklogwithoutcapacity-alarm&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;AlarmARN&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Your AlarmARN&quot;</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="nt">&quot;CreationTime&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;2023-08-14T13:53:10.480000+08:00&quot;</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;PolicyARN&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Your PolicyARN&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;PolicyName&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;CPUUtil-ScalingPolicy&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;ServiceNamespace&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;sagemaker&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;ResourceId&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;endpoint/esd-type-c356f91/variant/prod&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;ScalableDimension&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;sagemaker:variant:DesiredInstanceCount&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;PolicyType&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;TargetTrackingScaling&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;TargetTrackingScalingPolicyConfiguration&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;TargetValue&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">50.0</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;CustomizedMetricSpecification&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;MetricName&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;CPUUtilization&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;Namespace&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;/aws/sagemaker/Endpoints&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;Dimensions&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;Name&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;EndpointName&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;Value&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;esd-type-c356f91&quot;</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;Name&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;VariantName&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;Value&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;prod&quot;</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="nt">&quot;Statistic&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Average&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;Unit&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Percent&quot;</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="nt">&quot;ScaleOutCooldown&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">300</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;ScaleInCooldown&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">300</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="nt">&quot;Alarms&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;AlarmName&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;TargetTracking-endpoint/esd-type-c356f91/variant/prod-AlarmHigh-c915b303-9048-40b2-99a7-f5b7e49ab7c4&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;AlarmARN&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Your AlarmARN&quot;</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;AlarmName&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;TargetTracking-endpoint/esd-type-c356f91/variant/prod-AlarmLow-2fd61f99-c2e5-4ac6-9722-54030c3f0216&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;AlarmARN&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Your AlarmARN&quot;</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="nt">&quot;CreationTime&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;2023-08-14T13:53:10.182000+08:00&quot;</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">]</span>
<span class="p">}</span>
</code></pre></div>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../../..", "features": ["navigation.tabs"], "search": "../../../assets/javascripts/workers/search.b8dbb3d2.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": "develop"}</script>
<script src="../../../assets/javascripts/bundle.fe8b6f2b.min.js"></script>
</body>
</html>