-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathprisc.tex
397 lines (340 loc) · 20.8 KB
/
prisc.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
%% For double-blind review submission, w/o CCS and ACM Reference (max submission space)
%\documentclass[sigplan,review,anonymous]{acmart}\settopmatter{printfolios=true,printccs=false,printacmref=false}
%% For double-blind review submission, w/ CCS and ACM Reference
%\documentclass[sigplan,review,anonymous]{acmart}\settopmatter{printfolios=true}
%% For single-blind review submission, w/o CCS and ACM Reference (max submission space)
%\documentclass[sigplan,review]{acmart}\settopmatter{printfolios=true,printccs=false,printacmref=false}
%% For single-blind review submission, w/ CCS and ACM Reference
%\documentclass[sigplan,review]{acmart}\settopmatter{printfolios=true}
%% For final camera-ready submission, w/ required CCS and ACM Reference
\documentclass[sigplan,dvipsnames]{acmart}\settopmatter{}
%% "not anonymous"
\settopmatter{printacmref=false} % Removes citation information below abstract
\renewcommand\footnotetextcopyrightpermission[1]{}
%% Conference information
%% Supplied to authors by publisher for camera-ready submission;
%% use defaults for review submission.
\acmConference[PriSC'22]{ACM SIGPLAN Workshop on Principles of Secure Compilation}{January 22nd, 2022}{Philadelphia, PA, USA}
\acmYear{2022}
%\acmISBN{} % \acmISBN{978-x-xxxx-xxxx-x/YY/MM}
%\acmDOI{} % \acmDOI{10.1145/nnnnnnn.nnnnnnn}
%\startPage{1}
%% Copyright information
%% Supplied to authors (based on authors' rights management selection;
%% see authors.acm.org) by publisher for camera-ready submission;
%% use 'none' for review submission.
\setcopyright{none}
%\setcopyright{acmcopyright}
%\setcopyright{acmlicensed}
%\setcopyright{rightsretained}
%\copyrightyear{2022} %% If different from \acmYear
%% Bibliography style
\bibliographystyle{ACM-Reference-Format}
%% Citation style
%\citestyle{acmauthoryear} %% For author/year citations
%\citestyle{acmnumeric} %% For numeric citations
%\setcitestyle{nosort} %% With 'acmnumeric', to disable automatic
%% sorting of references within a single citation;
%% e.g., \cite{Smith99,Carpenter05,Baker12}
%% rendered as [14,5,2] rather than [2,5,14].
%\setcitesyle{nocompress} %% With 'acmnumeric', to disable automatic
%% compression of sequential references within a
%% single citation;
%% e.g., \cite{Baker12,Baker14,Baker16}
%% rendered as [2,3,4] rather than [2-4].
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Note: Authors migrating a paper from traditional SIGPLAN
%% proceedings format to PACMPL format must update the
%% '\documentclass' and topmatter commands above; see
%% 'acmart-pacmpl-template.tex'.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand\hmmax{0}
\newcommand\bmmax{0}
%% Some recommended packages.
\usepackage[colorinlistoftodos]{todonotes}
\newcommand{\MK}[1]{\todo[color=orange!30]{TODO: #1}}
\newcommand{\MP}[1]{\todo[color=blue!30]{TODO: #1}}
\usepackage{booktabs} %% For formal tables:
%% http://ctan.org/pkg/booktabs
\usepackage{subcaption} %% For complex figures with subfigures/subcaptions
%% http://ctan.org/pkg/subcaption
\usepackage{stmaryrd}
\usepackage{mathrsfs}
\usepackage{mathtools}
\usepackage{xcolor}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{glossaries}
\usepackage{bm}
\usepackage{cleveref}
\usepackage{listings}
\usepackage{./../mmmacros}
\usepackage[switch]{lineno}
%\modulolinenumbers[2]
\renewcommand{\linenumberfont}{\normalfont\bfseries\small\color{red}}
\newacronym{rc}{RC}{Robust Compilation}
\begin{document}
%% Title information
\title{Composing Secure Compilers} %% [Short Title] is optional;
%% when present, will be used in
%% header instead of Full Title.
%\titlenote{with title note} %% \titlenote is optional;
%% can be repeated if necessary;
%% contents suppressed with 'anonymous'
%\subtitle{Subtitle} %% \subtitle is optional
%\subtitlenote{with subtitle note} %% \subtitlenote is optional;
%% can be repeated if necessary;
%% contents suppressed with 'anonymous'
%% Author information
%% Contents and number of authors suppressed with 'anonymous'.
%% Each author should be introduced by \author, followed by
%% \authornote (optional), \orcid (optional), \affiliation, and
%% \email.
%% An author may have multiple affiliations and/or emails; repeat the
%% appropriate command.
%% Many elements are not rendered, but should be provided for metadata
%% extraction tools.
%% Author with single affiliation.
\author{Matthis Kruse}
%\authornote{with author1 note} %% \authornote is optional;
%% can be repeated if necessary
\orcid{0000-0003-4062-9666} %% \orcid is optional
\affiliation{
% \position{Position1}
% \department{Department1} %% \department is recommended
\institution{CISPA Helmholtz Center for Information Security} %% \institution is required
% \streetaddress{Street1 Address1}
% \city{City1}
% \state{State1}
% \postcode{Post-Code1}
\country{Germany} %% \country is recommended
}
\email{[email protected]} %% \email is recommended
%% Author with two affiliations and emails.
\author{Marco Patrignani}
%\authornote{with author2 note} %% \authornote is optional;
%% can be repeated if necessary
\orcid{0000-0003-3411-9678} %% \orcid is optional
\affiliation{
% \position{Position2a}
% \department{Department2a} %% \department is recommended
\institution{CISPA Helmholtz Center for Information Security} %% \institution is required
% \streetaddress{Street2a Address2a}
% \city{City2a}
% \state{State2a}
% \postcode{Post-Code2a}
\country{Germany} %% \country is recommended
}
\email{[email protected]} %% \email is recommended
%% Abstract
%% Note: \begin{abstract}...\end{abstract} environment must come
%% before \maketitle command
%\begin{abstract}
%Text of abstract \ldots.
%\end{abstract}
%% 2012 ACM Computing Classification System (CSS) concepts
%% Generate at 'http://dl.acm.org/ccs/ccs.cfm'.
%\begin{CCSXML}
%<ccs2012>
%<concept>
%<concept_id>10011007.10011006.10011008</concept_id>
%<concept_desc>Software and its engineering~General programming languages</concept_desc>
%<concept_significance>500</concept_significance>
%</concept>
%<concept>
%<concept_id>10003456.10003457.10003521.10003525</concept_id>
%<concept_desc>Social and professional topics~History of programming languages</concept_desc>
%<concept_significance>300</concept_significance>
%</concept>
%</ccs2012>
%\end{CCSXML}
%\ccsdesc[500]{Software and its engineering~General programming languages}
%\ccsdesc[300]{Social and professional topics~History of programming languages}
%% End of generated code
%% Keywords
%% comma separated list
%\keywords{compilers, security} %% \keywords are mandatory in final camera-ready submission
%% \maketitle
%% Note: \maketitle command must come after title commands, author
%% commands, abstract environment, Computing Classification System
%% environment and commands, and keywords command.
\maketitle
\linenumbers
\section{Introduction}
% a recent theory of secure compilation is robust ...
Compilers translate programs from a source to a target programming language.
A secure compiler preserves source level properties at the target level when interoperating with arbitrary program contexts (which are considered attackers).
% Different such criteria exist.
A recent theory of secure compilation is \gls{rc}, which is a collection of criteria for secure compilers~\cite{abate19,abate20,patrignani21}.
Informally, a compiler is \gls{rc} if a source program and its compiled counterpart, linked with an arbitrary source and target context respectively, satisfy that property.
% Hereby, the target program is the compiled version of the source program.
Even though there exist robust compilers, they are far from practical.
Real-world compilers consist of several smaller compilers that are composed with each other in different ways.
An example would be any compiler based on the LLVM toolchain~\cite{lattner04}, whose optimisation pipeline consists of many passes, which one can view as independent compilers composed with each others.
Also, any lowering steps, such as from a frontend language to LLVM IR and subsequently to assembly, are compilers.
To the best of our knowledge, current work on robust compilation does not discuss the preservation of source-level properties for compilers such as the ones above.
This paper investigates how different compiler compositions preserve different classes of hyperproperties, given that these compilers attain some form of \gls{rc}.
% To reason about hyperproperty preservation, we look at classes of hyperproperties, which we call classes.
% This is an idea taken from~\citet{abate19} but made explicit.
We examine whether these compositions preserve at least the set intersection of classes.
We then show that the order of optimisations in a \gls{rc} pipeline does not matter for property preservation.
Finally, we conclude with a discussion on what happens if some compilers in the pipeline do not attain \gls{rc} for some classes of interest.
\section{Compositionality}
% to reason about compilers, we need to ....
In this work, programs $\com{p}$ are elements of $\partials$, the set of partial programs of a given programming language.
A compiler is a partial function $\stcomp{\bullet}$ from programs $\src{p}$ of some source language $\S$ to programs $\trg{p}$ of some target language $\T$.
Compilers satisfying \Cref{def:rtp} below attain \gls{rc}~\cite{abate19}, the intuition there is that if the programmer makes certain assumptions on what a program does, these assumptions also hold for the compiled program.
In that definition, indicate hyperproperties~\cite{clarkson08} with $\Pi$ and classes of hyperproperties (i.e., sets of $\Pi$) as $\cC$.
A program $p$ robustly satisfies class $\cC$ (written $\rsat{p}{\cC}$) if its behaviour is included in an element of $\cC$ when linked with an arbitrary program context.
Similarly, for some $\Pi\in\cC$, we write $\rsat{p}{\Pi}$ whenever $p$ robustly satisfies $\Pi$.
%
\begin{definition}[Robust Compilation]\label{def:rtp}
For a given class $\cC$, a compiler from languages $\S$ to $\T$ robustly preserves $\cC$ ($\rtp{\stcomp{\bullet}}{\cC}$) iff
$$
\forall\Pi\in\cC,\forall\src{p}\in\src{\partials},\rsat{\src{p}}{\Pi}\implies\rsat{\stcomp{\src{p}}}{\Pi}
$$
\end{definition}
% \noindent
%So, given a hyperproperty $\Pi$ from some class $\cC$ and a source program $\src{p}$ that robustly satisfies $\Pi$ ($\rsat{\src{p}}{\Pi}$), the compiled program $\stcomp{\src{p}}$ also needs to robustly satisfy $\Pi$.
In practice, (robust) compilers are composed of numerous others.
Therefore, we now investigate their compositionality.
\subsection{Simple Compositionality}
We first consider function composition, i.e., plugging the result of one compiler into another one.
Such pipelines happen when optimising source code (so, at the level of a suitable intermediate representation), but also on a higher level:
Consider as an example a typical $\src{TypeScript}$ compilation pipeline.
First, the compiler translates $\src{TypeScript}$ code to $\irl{JavaScript}$, which a part of V8 eventually compiles the code just-in-time to $\trg{assembly}$.
%
\begin{definition}[Sequential Composition of Compilers]
Given two compilers $\sicomp{\bullet}$ and $\itcomp{\bullet}$, their sequential composition is $\stcomp{\bullet}=\itcompN{\sicomp{\bullet}}$.
\end{definition}
Assuming that two compilers preserve certain classes, their sequential composition preserves the least upper bound, i.e., the set intersection of those classes:
\begin{lemma}[Sequential Composition with \gls{rc}]\label{lem:seqcompo}
Given $\rtp{\sicomp{\bullet}}{\cC_{1}}$ and $\rtp{\itcomp{\bullet}}{\cC_{2}}$, then $\rtp{\sitcomp{\bullet}}{\cC_{1}\cap\cC_{2}}$.
\end{lemma}
Using an inductive argument, \Cref{lem:seqcompo} generalises to $n$ \gls{rc} compilers, each preserving one of $n$ classes.
To do so, one has to generalise the composition of two \gls{rc} compilers to a set of $n$ ones.
A real-world example for such deeply nested compositions is the $\src{TypeScript}$ compilation mentioned above.
When compiling $\irl{JavaScript}$, V8 translates the code to $\trg{Ignition\ Bytecode}$.
At runtime, the Ignition interpreter does some performance measurements and particular parts of the code are eventually compiled to machine code.
We now consider a compiler that invokes two other compilers.
$\src{Java}$ and $\irl{Kotlin}$ are popular languages used in industry that are one example of such a composition and they both compile to $\trg{JVM\ Bytecode}$.
\begin{definition}[Upper Composition]
Given two compilers $\stcomp{\bullet}$ and $\itcomp{\bullet}$, their upper composition is
\[
\uhcsitcomp{\bullet}=\lambda p.
\begin{cases}
\stcomp{p} &\text{if }p\in\src{\partials}
\\
\itcomp{p} &\text{if }p\in\irl{\partials}
\end{cases}
\]
\end{definition}
We can derive a similar result to \Cref{lem:seqcompo} here, too:
\begin{lemma}[Upper Composition with \gls{rc}]\label{lem:useqcompo}
Given $\rtp{\stcomp{\bullet}}{\cC_{1}}$ and $\rtp{\itcomp{\bullet}}{\cC_{2}}$, then $\rtp{\uhcsitcomp{\bullet}}{\cC_{1}\cap\cC_{2}}$.
\end{lemma}
\Cref{lem:useqcompo} also generalises inductively to a number of compilers and classes.
A practical example of why that might be useful is the Java Virtual Machine with its $\trg{JVM\ Bytecode}$, which has numerous frontends: $\src{Java}$, $\irl{Kotlin}$, $\src{Scala}$, and $\irl{Clojure}$, to list a few examples.
With the same idea, we define a dual composition that goes from a single source language to multiple target languages.
\texttt{dune} is a build system which can be used to compile $\src{OCaml}$ code to both $\irl{assembly}$ and $\trg{Caml\ Bytecode}$.
\begin{definition}[Lower Composition]
Given two compilers $\stcomp{\bullet}$ and $\sicomp{\bullet}$, their lower composition is $\lhcsitcomp{\bullet}$.
\end{definition}
\begin{lemma}[Lower Composition with \gls{rc}]
Given $\rtp{\stcomp{\bullet}}{\cC_{1}}$ and $\rtp{\sicomp{\bullet}}{\cC_{2}}$, then $\rtp{\lhcsitcomp{\bullet}}{\cC_{1}\cap\cC_{2}}$.
\end{lemma}
As before, this can be generalized to an arbitrary number of compilers, which also has a connection to the real-world, given by the diverse set of assembly language dialects.
The following free theorem (\Cref{lem:swappable}) is a direct consequence of \Cref{lem:seqcompo} where the involved compilers' input and output are both partial programs in the same language.
Given that some compiler passes attain \gls{rc}, they can be combined in an arbitrary order and the result preserves the same least upper bound.
A compiler's pipeline ordering is difficult and often hand-tuned.
The lemma allows us to not care about the particular order of optimisations regarding their robust property preservation.
So, the compiler developer is free to swap passes around.
\begin{lemma}[Swappable]\label{lem:swappable}
Given $\rtp{\ttcomp{\bullet}_{(1)}}{\cC_{1}}$ and $\rtp{\ttcomp{\bullet}_{(2)}}{\cC_{2}}$, then $\rtp{\ttcompN{\ttcomp{\bullet}_{(2)}}_{(1)}}{\cC_{1}\cap\cC_{2}}$ and $\rtp{\ttcompN{\ttcomp{\bullet}_{(1)}}_{(2)}}{\cC_{1}\cap\cC_{2}}$.
\end{lemma}
However, in practice, compiler passes are not necessarily attaining \gls{rc}.
Consider any stereotypical compilation pipeline.
Programmers want properties at the source level to be preserved at the target level.
Thus, if source programs robustly satisfy some property, so should their compiled counterparts.
Unfortunately, it might not be necessary for compilation passes from one intermediate representation to the other to preserve properties robustly.
This also has a security justification since compiler intermediate representations are not where typical attackers reside (i.e., the target language).
So, there might be some stronger property a pass has to satisfy in order to render the whole compilation pipeline secure: this is what we study next.
\subsection{Advanced Compositionality}
Consider the following C code snippet that performs an infinite loop if an invalid pointer is given:
\begin{lstlisting}[language=c]
int something(int* ptr) {
while(!ptr);
return *ptr;
}
\end{lstlisting}
Compiling such code with optimisations turned on by using the command \href{https://godbolt.org/z/bnaGnPe36}{\texttt{g++ -O2}} and the \texttt{g++} compiler version 11.2 yields an x86-program where the potentially infinite loop has been removed:
\begin{lstlisting}[language={[x86masm]Assembler}]
something(int*):
mov eax, DWORD PTR [rdi]
ret
\end{lstlisting}
We now have an attack to violate memory safety: call the function with an invalid pointer and the program dereferences it.
To prevent such issues we can use instrumentation passes that \textit{enforce} memory safety by adding dynamic checks to the program and crashing appropriately when a violation is detected.
There exist several memory-safety instrumentations, both for target~\cite{watson15,aurajo16,vassena19,kouwe17,tarditi2018,sammler21} and source languages~\cite{nagarakatte10,akritidis09,rigger17}.
%So, even though the programmer has written a memory-insecure program, we might still be interested in a memory-safe binary.
We now sketch how to extend our work with instrumentations, which enforce specific classes of hyperproperties.
% A sketch of the theory is as follows.
\begin{definition}[Secure Instrumentation for Preserving $\cC$]\label{def:secure-instrumentation}
A secure instrumentation with respect to some class $\cC$ is a pass that enforces hyperproperties described by some other class $\cC'$ without violating $\cC$-satisfying programs.
We denote such a secure instrumentation as: $\sinstr{\stcomp{\bullet}}{\cC'}{\cC}$.
\end{definition}
Using this, we firstly want to inspect a compilation pipeline from memory-safe $\src{Rust}$ to optimised, insecure $\irl{C}$, to memory-safe $\trg{CheckedC}$.
Intuitively, we want to be able to state that this pipeline preserves memory safety, despite the fact that the pass to $\irl{C}$ does not.
% We want to be able to reconstruct memory safety for $\trg{CheckedC}$ despite the fact that $\irl{C}$ did not have it.
\begin{example}[Enforcement may preserve...]
Given classes $\cC_{1}, \cC_{2}$ (resp. no property and memory safety, in our $\src{Rust}$ to $\trg{CheckedC}$ example) and compilers $\sicomp{\bullet}$,$\itcomp{\bullet}$, if:
\begin{itemize}
\item $\rtp{\sicomp{\bullet}}{\cC_{1}}$
\item $\sinstr{\itcomp{\bullet}}{\cC_{2}}{\cC_{1}}$
\end{itemize}
\noindent
Then, $\rtp{\sitcomp{\bullet}}{\cC_{1}\cup\cC_{2}}$.
\end{example}
Dually, running a compiler that does not respect memory-safety after a memory-safety instrumentation nullifies its preservation:
\begin{example}[...but, order matters!]
Given classes $\cC_{1},\cC_{2}$ and compilers $\sicomp{\bullet}$,$\itcomp{\bullet}$, if:
\begin{itemize}
\item $\sinstr{\sicomp{\bullet}}{\cC_{2}}{\cC_{1}}$
\item $\rtp{\itcomp{\bullet}}{\cC_{1}}$
\end{itemize}
\noindent
Then, $\rtp{\sitcomp{\bullet}}{\cC_{1}}$.
\end{example}
%We also intend to study property-free characterisations of the presented material.
%Another area is its specialisation to instances, such as $\cHSafety$, and subsets thereof.
Beyond this general theory, we also intend to study the compositionality aspects of concrete hyperproperties, such as Speculative Non-Interference~\cite{guarnieri19}, memory safety~\cite{azevedodeamorim18,azevedodeamorim15,dhawan15}, and cryptographic constant-time~\cite{barthe18}.
%
% use british english!
%
% uppercase after semicolon
%
%% Acknowledgments
%\begin{acks} %% acks environment is optional
% %% contents suppressed with 'anonymous'
% %% Commands \grantsponsor{<sponsorID>}{<name>}{<url>} and
% %% \grantnum[<url>]{<sponsorID>}{<number>} should be used to
% %% acknowledge financial support and will be used by metadata
% %% extraction tools.
% This material is based upon work supported by the
% \grantsponsor{GS100000001}{National Science
% Foundation}{http://dx.doi.org/10.13039/100000001} under Grant
% No.~\grantnum{GS100000001}{nnnnnnn} and Grant
% No.~\grantnum{GS100000001}{mmmmmmm}. Any opinions, findings, and
% conclusions or recommendations expressed in this material are those
% of the author and do not necessarily reflect the views of the
% National Science Foundation.
%\end{acks}
\newpage
%% Bibliography
\bibliography{library}
% \bibliography{./../library}
%% Appendix
%\appendix
%\section{Appendix}
%Text of appendix \ldots
\end{document}