paper-KeepItSimple/sections/design.tex

% -*- root: ../main.tex -*-
%!TEX root=../main.tex


% %\begin{figure*}
% %\begin{tabular}{l|l|l|l}
% %Pending Usage & What to Do & Benefit & Example \\ \hline
% %Background task & Set to 70\% & Better energy & system\_service; Instagram service \\
% %Fixed compute; interactive & Set to 100\% & Better latency (sacrifice energy) & app cold start; installation \\
% %Block on user (screen) interaction & Set to 50-70\% & Better energy; minimal cost (jank) & FB app interaction \\
% %Block on intermittent I/O & Set to 70\% & Better latency *and* energy & PocketData microload \\
% %Block on memory & Set to <50\% & Better energy & Sparse bubblesort \\
% %Block on predictable GPS & TBD & TBD & TBD \\
% %\end{tabular}
% %\bfcaption{Governor decision logic \fixme{REFORMAT NO V-RULES}}
% %\label{decision_tree}
% %\end{figure*}

% \begin{figure}
% \begin{tabular}{l|l|l|l|l|l}
% Priority	& Interactivity		& What to Do	& Benefit	& Cost		& Example \\ \hline
% performance	&			& set to 100\%	& optimize UI	& energy	& app coldstarts, installs \\
% energy		& screen on / streaming	& set to 70\%*	& save energy,	& UI (possible	& FB app interaction \\
% 		&			&		& boost UI	& screendrops)	& \\
% energy		& not being used	& set to 70\&*	& save energy	& none		& system\_service, fluffychat \\
% prevent stalls	& (either)		& set to <70\%*	& save energy	& none		& (\memory workloads) \\
% \end{tabular}
% \bfcaption{Governor decision choices (*HW dependent) \fixme{Redundant??}}
% \label{decision_tree}
% \end{figure}

% \begin{figure}
% \centering
% \includegraphics[width=.95\linewidth]{figures/simple_algorithm.pdf}
% \bfcaption{Governor decision logic}
% \label{fig:simple_algorithm}
% \end{figure}

% \begin{algorithm}
% \caption{CPU Speed Selection \fixme{pull unimplemented}}
% \label{alg:cpu_speed_selection}
% \begin{algorithmic}[1]
% \Procedure{CPU Speed Selection}{}

% \If{$ runqueue = empty $}
% \State $ speed \gets 0 $
% \ElsIf{$ \exists cache\_bound\_ hint $}
% \State $ speed \gets 50 $
% \ElsIf{$ screenon = False $}
% \State $ speed \gets $ $\fenergy$
% \ElsIf{$ \exists performance\_hint $}
% \State $ speed \gets $ $\fperf$
% \Else
% \State $ speed \gets $ $\fenergy$
% \EndIf

% \EndProcedure
% \end{algorithmic}
% \end{algorithm}

% Some system of changing CPU speed is necessary to achieve the base goals of furnishing performance when needed and conserving energy when not needed.
% Instead of a system that that dwells on recent past CPU usage to make frequent adjustments to CPU speed, is there an alternative?
% Our experiments suggest a less complex approach:  Run the CPU at a preset energy-conservant midspeed that most of the time, and rely on hints from userspace for exceptions to the norm, when a different priority and speed should apply.


% \subsection{Why phones invite a simpler approach}

% Our system design partly reflects the nature of our use platform:  mobile devices.
% %Phones have limited resources and specific use cases.
% A general purpose system, including a general-case CPU policy, is inappropriate, as it misses optimizations that stem from behavior patterns that are common and predictable to phones.
% Conversely, uses that may be frequent on other systems -- such as running CPUs at full speed indefinitely -- do not occur.
% %Accommodating such cases is not necessary.
% %\fixme{Following does not apply to our system}
% %A second aspect of phones also influences our system design.
% While phones must run many types of tasks -- media, games, browsers, financial apps -- they typically only do one at a time.
% Only one app is visible and potentially interactive.
% This greatly reduces the need to resolve priority and resource conflicts amongst apps.
% %Our system thus focuses on the needs and demands of this foreground app, and avoids distractions that other information may induce.
% %\fixme{Change policy to ignore performance (not stall) hints from non-interactive apps?}

% %On phones, system priorities fall into 2 broad categories.
% %When the user is interacting with the phone and is also waiting on response from the phone -- immediately run the CPU at maximum speed.
% %Otherwise, run the CPU at a speed that conserves energy while preserving user experience.
% %For both cases, let the idle subsystem turn off any unneeded CPUs.

% %The reason the general case should focus on energy and not performance stems from the nature of the platform.
% %Phone apps, being interactive in nature, spend the bulk of their time blocking on user input, waiting for button presses or screen swipes.
% %Most of the time, the user is \textit{not} waiting for the phone to become ready for further use.
% %The main product of phones is screendraws, whether scrolling a list or animating a graphic.
% %The computation for these updates takes place ahead of time in background tasks.
% %As long as the CPU speed is fast enough to meet these updates, any additional speed only wastes energy.
% %Our system leverages this situation by picking a CPU speed that saves energy while maintaining user experience.

% %On phones, occasions when additional CPU speed is needed -- when the user is actually waiting on the phone to become responsive -- tend to be both relatively uncommon and readily identifiable.
% %Relying on information future app priorities, whether energy or performance, workably addresses this need.
% %It also yields better latency when it is needed than the default approach.


% \subsection{Introducing \systemname}

% We design a system, \systemname, to run the general case at a speed that saves energy at acceptable cost compared to the default system policy.
% We leverage hints from userspace to accomodate outlier times where a different setting may be warranted.
% Particularly, our system runs performance-desireable periods with better latency than the default.

% \systemname runs the general case at a speed setting that conserves energy.
% We previously observed in Section \ref{complexity_cost} that, for a given workload, energy can be minimized by running the CPU at some midspeed.
% The exact energy-optimal speed is device- and CPU-specific, as even the big and little core clusters of phones will have slightly different optima.
% However, we have found that the variance is close enough that a simple single-speed setting, $\fenergy$, suffices.

% Previous studies have acknowledged the policy goal in adjusting CPU speed on phones should be to minimize energy usage, subject to meeting performance targets.\cite{rao2017application}
% Non-phone studies have suggested the goal in such cases should be to meet deadlines rather than merely to minimize compuation latency.\cite{korkmaz2018workload}
% Given the useful output of phones is largely screen interation, we adopt screendraws as our metric.
% We make the key observation, which we evaluate in Section \ref{sec:evaluation}, that $\fenergy$ speed not only saves energy but is \textit{also fast enough} to run representative interactive apps at acceptable performance.
% It does so by avoiding the pitfalls of picking a speed that is too low -- sacrificing performance, energy or both -- or too high, wasting energy.
% %This design, intuitive for for non-interactive periods, also proves suitable for running typical interactive apps.
% In the later case, the problem with the default governor is that it runs the CPU faster than necessary to meet deadlines, resulting in overperformance.

% We design \systemname to receive hints from userspace when exceptions to the general case should occur.
% Particularly, periods when performance should be prioritized run at the maximum CPU speed $\fperf$.
% Such instances, when the user is waiting on the phone, include app coldstarts and app installs.
% Note that such periods are not synonymous with CPU-bound workloads.
% Indeed, the intermittency of the CPU load during these periods in practice is precisely what makes their runtime even worse in the default case by triggering the default \schedutil policy to ramp down the CPU speed.
% Rather, we leave the CPU at $\fperf$ when there is work, and let the idle policy shut off the CPU when not.
% Our governor takes its guidance from what the system priority should be, rather than how much computational work there is.
% \fixme{\memory hints}

% %For non-interactive periods, when the screen is off, there is virtually no reason to run the device at anything but an energy-optimal setting.


% \subsection{Decision Logic}
% \label{sub:decision_logic}

% Algorithm \ref{alg:cpu_speed_selection} depicts the speed selection logic of \systemname.
% Our basic design is to set the CPU speed to $\fenergy$ for general case, unless overridden.
% In all cases, the existing Linux idle policy will disable the CPU if the corresponding runqueue is empty -- that is, if the individual CPU has no work.
% Otherwise, when there is work to do, the CPU will run in a C-state at a speed selected by \systemname.
% The governor first considers whether there is any current \memory hint, from any app.
% If so, it sets the CPU cluster speed to \fmemory.
% \fixme{justify this -- microbench}
% This is because, regardless of the interactive state of the phone or of any other hints received, a \memory workload will generate ongoing CPU stalls.
% Running the CPU any faster than \fmemory will waste energy at negligible benefit.

% Otherwise, the governor considers whether the phone is currently interactive -- whether the screen is on.
% \fixme{or audio streaming?}
% If so, it sets the CPU speed to $\fenergy$:  Absent interactivity, there is no reasonable priority besides conserving energy.
% Any request otherwise from an app is therefore discarded.
% Thirdly, the governor checks whether there is any current performance hint request, from any app, and if so immediately sets the CPU speed to $\fperf$.
% Since the device is interactive, the phone should immediately prioritize latency and not wait for any rampup.
% Nor should it consider any intermittency in the workload -- say, an app blocking on I/O -- as does the \schedutil default policy.

% Lastly, as a default the governor sets the CPU to $\fenergy$.
% In this case, the policy knows the phone is interactive, so it also monitors screendraws.
% \fixme{not implemented -- offer as an option?}
% If the frame drop (jank) rate hits \fixme{what}, the governor adjusts the speed upward slightly, bounded by \fixme{what}.
% This avoids the overperformance triggered by the default policy while maintaining user experience.
% In practice, we have observed that keeping the CPU at $\fenergy$ is sufficient.

% We do not address different simultaneous intra-cluster CPU speeds:  On our devices, like most current phones \fixme{verify this}, the speeds of the 4 big and 4 little core clusters must be set as a block.
% For stability and security, we implement a time-out of 10s -- we have observed these periods are typically much less than this in practice, and userspace can always re-supply the hint.

% %Hints from userspace generally involve an upcoming performance-prioritized period where the CPUs should be set to 100\%.
% %We also support a userspace hint for pending \memory workload, where the CPU should be set to a lower speed of 40\%.
% %Userspace can also indicate that performance-priority or \memory optimized periods have ended, and that default behavior should resume.

% %When \systemname receives conflicting hints from different apps, it prioritizes both performance-optimized and \memory hints over default behavior (energy-centric) hints.
% %\fixme{NO FIX THIS -- set to slowest of received hints}
% %We reserve resolution between performance-prioritized (speed 100) and \memory (speed 40) hints to future work.

% %\fixme{OR ignore hint if screenoff / non-interactive}

%We start with the default \schedutil governor, and iteratively adapt it based on our claims from the preceding sections.
The Android AOSP 10 platform forms the base of our system.
We modded the Linux 4.4.210 kernel with the \systemname governor to implement the design logic.
This is based on a fork of the \schedutil governor.
We rely on the existing system idle policy to put the CPU in an idle state whenever the runqueue becomes empty.
The Linux CFS scheduler, as before, periodically calls into the governor to set the CPU cluster speed.
The \systemname governor picks a new CPU (cluster) speed.% as described in \Cref{alg:fullKiss} above.
%The default policy sets the speed based upon recent utilization.
%Instead, we set the speed to $\fenergy$ in the general case.
% A syscall API, with native calldown support from the Android platform, allows userspace to communicate hints about pending system needs and to request a new default CPU speed setting.
% This suggestion can be either a new fixed speed or a bounded range that allows the calculated \schedutil speed to float within a requested range.
% The kernel then uses this information, along with the decision logic of Section \ref{sub:decision_logic}, to set the actual speed of the CPU cluster.

In-kernel structures maintain the status of currently active \memory and performance hints from userspace.
Hints can be set and cleared on a per-task basis.
The governor enforces a per-task timeout of 10s for each requested hint, implemented as a kernel \texttt{delayed\_work} task that asynchronously clears the task's hint if it is still current.
Tasks may re-request a hint, regardless of whether an existing hint has been cleared manually or timed out.
The former case resets the timeout clock to 10s.
The governor ignores requests to clear hints from tasks that have not requested any, leaving intact any hints from other tasks.

% We modded the AOSP framework to expose the new syscall interface to userlevel Java code.
% \fixme{todo}
% The later can supply appropriate hints for common observed usecases, app coldstarts and app installs.
% The hint API is also directly available for apps if they know their upcoming behavior warrants it -- likely, only some high-compute games or miners may need this.
% Additionally, we modded the platform to load kernel module that the build process regenerated due to our changes to the kernel source -- the default build system does not incorporate such changes, and a modded kernel will refuse to load the unrebuilt modules.
% Finally, we added a subset of the closed-source Google Apps to the AOSP build to allow downloading and installing of arbitrary apps from the Google Play Store.


% \subsection{API}

% \tinysection{Existing}

% The Linux maintainers have recognized the need for better performance, and introduced an API for userspace processes to request a boost (or reduction) to the CPU speed otherwise calculated by the schedutil governor.
% \fixme{cut and pasted above}
% They have implemented a virtual filesystem, mounted at \texttt{/dev/stune/}, with relevant hooks:
% \begin{itemize}
% \item[]{\texttt{schedtune.boost}}
% \item[]{\texttt{schedtune.prefer\_idle}}
% \item[]{\texttt{schedtune.tasks}}
% \item[]{\texttt{schedtune.cgroup\_procs}}
% \end{itemize}
% The \texttt{schedtune.boost} pseudofile is a performance boost parameter that adjusts the apparent usage of a given task as presented by the scheduler to the \schedutil governor.
% With a 50\% boost value, for example, a task that originally had 60\% load value of maximum would be inflated to report 80\% to the governor.
% A 100\% boost value would thus eventually result in the CPU being set to maximum frequency.
% The \texttt{schedtune.prefer\_idle}, when set, suggests the task be run on a currently idle CPU if possible.
% The stune framework allows creation of different peformance boost groups for tasks and groups.
% These can be popluated using the \texttt{schedtune.tasks} and \texttt{schedtune.cgroup\_procs}.

% \tinysection{Ours}

% The governor itself is a fork of the existing \schedutil policy.
% Switch the kernel to use our \systemname policy through \texttt{sysfs}:
% \begin{itemize}
% \item[]{\texttt{\$ echo "kiss" > /sys/devices/system/cpu/cpufreq/policy0}}
% \item[]{\texttt{\$ echo "kiss" > /sys/devices/system/cpu/cpufreq/policy4}}
% \end{itemize}
% Note policies must be set individually for both CPU clusters.

% To request or remove a performance boost hint to the kernel, use the syscall:
% \begin{itemize}
% \item[]{\texttt{int syscall(SYS\_prio\_hint, int hint);}}
% \end{itemize}
% On our current system, \texttt{SYS\_prio\_hint} is the constant 285.
% Supported hint values are 70 or 100.
% A value of 100 will set the CPU frequency to maximum (100\%).
% A value of 70 will return the speed to the default (not necessarily 70\%).

% To change the default frequency of the cluster:
% \begin{itemize}
% \item[]{\texttt{int syscall(SYS\_cpufreq\_def, unsigned int, lmin, unsigned int, lmax, unsigned int, bmin, unsigned int, bmax)}}
% \end{itemize}

% \texttt{SYS\_cpufreq\_def} is the constant 286.
% The values \texttt{lmin, lmax, bmin,} and \texttt{bmax} should be implemented CPU frequencies, and represent frequency bounds, either low or high, of the little and big CPU clusters.
% If a parameter value is negative, it will be ignored and the existing default value reused.