From f03ef1194079c7e5620e596c7b0e1c8a9c7442ea Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 24 Sep 2023 14:13:14 +0100 Subject: [PATCH] tls: enable support for encryption of memory pages This patch enables encryption of memory pages using the AES-XTS. This block cipher is more efficient when encrypting consecutive blocks of data (memory pages) and allows the use of hardware acceleration available in modern CPUs. XTS uses two 256-bits AES keys. One key is used to perform block encryption, and the other is used to encrypt a "tweak value". The encrypted tweak is further modified with a Galois polynomial function and XOR-ed with both the plain text and the cipher text of each block. This ensures that encrypting multiple blocks with identical data will produce different ciphertext. Suggested-by: Daiki Ueno Signed-off-by: Radostin Stoyanov --- criu/include/restorer.h | 4 + criu/include/tls.h | 6 + criu/mem.c | 29 ++++ criu/page-xfer.c | 55 ++++++-- criu/pagemap.c | 21 +++ criu/pie/restorer.c | 86 +++++++++++- criu/tls.c | 292 +++++++++++++++++++++++++++++++++++++++- images/cipher.proto | 2 + 8 files changed, 481 insertions(+), 14 deletions(-) diff --git a/criu/include/restorer.h b/criu/include/restorer.h index f398d8d8fe..c1bcd89531 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -238,6 +238,10 @@ struct task_restore_args { */ struct rst_rseq_param libc_rseq; + bool encrypted_pages; + int decryption_pipe_fd_r; + int decryption_pipe_fd_w; + uid_t uid; u32 cap_eff[CR_CAP_SIZE]; } __aligned(64); diff --git a/criu/include/tls.h b/criu/include/tls.h index 31f8e9f584..566d87d55d 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -24,6 +24,9 @@ int tls_encrypt_file_data(int fd_in, int fd_out, size_t data_size); int tls_decrypt_file_data(int fd_in, int fd_out, size_t data_size); int tls_encryption_pipe(int output_file_fd, int pipe_read_fd); int tls_decryption_pipe(int intput_file_fd, int pipe_write_fd); +int tls_block_cipher_encrypt_data(void *ptext, size_t ptext_len); +int tls_block_cipher_decrypt_data(void *ctext, size_t ctext_len); +int tls_vma_io_pipe(int pages_img_fd, int pipe_read_fd, int pipe_write_fd); #else /* CONFIG_GNUTLS */ @@ -42,6 +45,9 @@ int tls_decryption_pipe(int intput_file_fd, int pipe_write_fd); #define tls_decrypt_file_data(fd_in, fd_out, data_size) (-1) #define tls_encryption_pipe(output_file_fd, pipe_read_fd) (-1) #define tls_decryption_pipe(intput_file_fd, pipe_write_fd) (-1) +#define tls_block_cipher_encrypt_data(ptext, ptext_len) (-1) +#define tls_block_cipher_decrypt_data(ctext, ctext_len) (-1) +#define tls_vma_io_pipe(pages_img_fd, pipe_read_fd, pipe_write_fd) (-1) #define write_img_cipher() (0) #endif /* CONFIG_HAS_GNUTLS */ diff --git a/criu/mem.c b/criu/mem.c index 417e0a21de..b05a33dfda 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -31,6 +31,7 @@ #include "prctl.h" #include "compel/infect-util.h" #include "pidfd-store.h" +#include "tls.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" @@ -1386,6 +1387,7 @@ int open_vmas(struct pstree_item *t) static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta) { struct cr_img *pages; + int pipe_fds[2][2]; /* * We optimize the case when rsti(t)->vma_io is empty. @@ -1410,6 +1412,33 @@ static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta) return -1; ta->vma_ios_fd = img_raw_fd(pages); + + if (!opts.encrypt) { + ta->encrypted_pages = false; + ta->decryption_pipe_fd_w = -1; + ta->decryption_pipe_fd_r = -1; + } else { + ta->encrypted_pages = true; + + if (pipe(pipe_fds[0])) { + pr_perror("Failed to create pipe"); + return -1; + } + if (pipe(pipe_fds[1])) { + pr_perror("Failed to create pipe"); + return -1; + } + + if (tls_vma_io_pipe(ta->vma_ios_fd, pipe_fds[0][0], pipe_fds[1][1])) { + pr_err("Failed to setup VMA IO pipe\n"); + return -1; + } + close(pipe_fds[0][0]); + close(pipe_fds[1][1]); + ta->decryption_pipe_fd_w = pipe_fds[0][1]; + ta->decryption_pipe_fd_r = pipe_fds[1][0]; + } + return pagemap_render_iovec(&rsti(t)->vma_io, ta); } diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 94f4774148..20c21dc11d 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -257,19 +257,52 @@ static int write_pages_loc(struct page_xfer *xfer, int p, unsigned long len) ssize_t ret; ssize_t curr = 0; - while (1) { - ret = splice(p, NULL, img_raw_fd(xfer->pi), NULL, len - curr, SPLICE_F_MOVE); - if (ret == -1) { - pr_perror("Unable to spice data"); - return -1; + if (opts.encrypt) { + uint8_t buf[PAGE_SIZE]; + + /* We encrypt each page separately to enable decryption + * of arbitrary pages during restore. This is required + * for auto-deduplication and incremental checkpointing. + */ + BUG_ON((len % PAGE_SIZE) != 0); + + for (curr = 0; curr < len; curr += PAGE_SIZE) { + ret = read(p, buf, PAGE_SIZE); + if (ret < 0) { + pr_perror("Unable to read data"); + return -1; + } + if (ret == 0) { + pr_err("A pipe was closed unexpectedly\n"); + return -1; + } + BUG_ON(ret != PAGE_SIZE); + + if (tls_block_cipher_encrypt_data(buf, PAGE_SIZE)) { + pr_err("Failed to encrypt data\n"); + return -1; + } + ret = write(img_raw_fd(xfer->pi), buf, PAGE_SIZE); + if (ret != PAGE_SIZE) { + pr_perror("Unable to write data %zd", ret); + return -1; + } } - if (ret == 0) { - pr_err("A pipe was closed unexpectedly\n"); - return -1; + } else { + while (1) { + ret = splice(p, NULL, img_raw_fd(xfer->pi), NULL, len - curr, SPLICE_F_MOVE); + if (ret == -1) { + pr_perror("Unable to spice data"); + return -1; + } + if (ret == 0) { + pr_err("A pipe was closed unexpectedly\n"); + return -1; + } + curr += ret; + if (curr == len) + break; } - curr += ret; - if (curr == len) - break; } return 0; diff --git a/criu/pagemap.c b/criu/pagemap.c index 83f69bba37..f1a5ce93de 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -13,6 +13,7 @@ #include "restorer.h" #include "rst-malloc.h" #include "page-xfer.h" +#include "tls.h" #include "fault-injection.h" #include "xmalloc.h" @@ -261,6 +262,15 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr, unsigned l break; } + if (opts.encrypt) { + for (int i = 0; i < len; i += PAGE_SIZE) { + if (tls_block_cipher_decrypt_data(buf + i, PAGE_SIZE)) { + pr_err("Failed to decrypt data\n"); + return -1; + } + } + } + if (opts.auto_dedup) { ret = punch_hole(pr, pr->pi_off, len, false); if (ret == -1) @@ -559,6 +569,17 @@ static int process_async_reads(struct page_read *pr) return -1; } + if (opts.encrypt) { + for (int i = 0; i < piov->nr; i++) { + for (int j = 0; j < piov->to[i].iov_len; j += PAGE_SIZE) { + if (tls_block_cipher_decrypt_data(piov->to[i].iov_base + j, PAGE_SIZE)) { + pr_err("Failed to decrypt data\n"); + exit(1); + } + } + } + } + if (opts.auto_dedup && punch_hole(pr, piov->from, ret, false)) return -1; diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 02971657ef..fddf6b22a2 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1517,6 +1517,75 @@ static ssize_t preadv_limited(int fd, struct iovec *iovs, int nr, off_t offs, si return ret; } +/** + * Similar to preadv_limited(), but uses pipes to communicate with + * a process running in parallel that decrypts the data as the gnutls + * library cannot be used in restorer. + */ +static ssize_t decrypt_preadv_limited(int rfd, int wfd, struct iovec *iovs, int nr, off_t offs, size_t max_to_read) +{ + size_t saved_last_iov_len = 0; + ssize_t ret; + ssize_t preadv_ret; + pid_t local_pid = sys_getpid(); + + if (max_to_read) { + for (int i = 0; i < nr; ++i) { + if (iovs[i].iov_len <= max_to_read) { + max_to_read -= iovs[i].iov_len; + continue; + } + + if (!max_to_read) { + nr = i; + break; + } + + saved_last_iov_len = iovs[i].iov_len; + iovs[i].iov_len = max_to_read; + nr = i + 1; + break; + } + } + + ret = sys_write(wfd, &local_pid, sizeof(pid_t)); + if (ret < 0) { + return -1; + } + + ret = sys_write(wfd, &offs, sizeof(off_t)); + if (ret < 0) { + return -1; + } + + ret = sys_write(wfd, &nr, sizeof(int)); + if (ret < 0) { + return -1; + } + + for (int i = 0; i < nr; i++) { + ret = sys_write(wfd, &iovs[i].iov_len, sizeof(size_t)); + if (ret < 0) { + return -1; + } + + ret = sys_write(wfd, &iovs[i].iov_base, sizeof(void *)); + if (ret < 0) { + return -1; + } + } + + ret = sys_read(rfd, &preadv_ret, sizeof(ssize_t)); + if (ret < 0) { + return -1; + } + + if (saved_last_iov_len) + iovs[nr - 1].iov_len = saved_last_iov_len; + + return preadv_ret; +} + /* * In the worst case buf size should be: * sizeof(struct inotify_event) * 2 + PATH_MAX @@ -1792,8 +1861,15 @@ __visible long __export_restore_task(struct task_restore_args *args) * If we're requested to punch holes in the file after reading we do * it to save memory. Limit the reads then to an arbitrary block size. */ - r = preadv_limited(args->vma_ios_fd, iovs, nr, rio->off, - args->auto_dedup ? AUTO_DEDUP_OVERHEAD_BYTES : 0); + if (args->encrypted_pages) { + r = decrypt_preadv_limited(args->decryption_pipe_fd_r, args->decryption_pipe_fd_w, iovs, + nr, rio->off, + args->auto_dedup ? AUTO_DEDUP_OVERHEAD_BYTES : 0); + } else { + r = preadv_limited(args->vma_ios_fd, iovs, nr, rio->off, + args->auto_dedup ? AUTO_DEDUP_OVERHEAD_BYTES : 0); + } + if (r < 0) { pr_err("Can't read pages data (%d)\n", (int)r); goto core_restore_end; @@ -1829,6 +1905,12 @@ __visible long __export_restore_task(struct task_restore_args *args) rio = ((void *)rio) + RIO_SIZE(rio->nr_iovs); } + if (args->encrypted_pages) { + pr_debug("Closing decryption pipe\n"); + sys_close(args->decryption_pipe_fd_r); + sys_close(args->decryption_pipe_fd_w); + } + if (args->vma_ios_fd != -1) sys_close(args->vma_ios_fd); diff --git a/criu/tls.c b/criu/tls.c index 751dabee35..0f7d9b6cfa 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -17,6 +17,7 @@ #include "protobuf.h" #include "cr_options.h" #include "xmalloc.h" +#include "tls.h" /* Compatibility with GnuTLS version < 3.5 */ #ifndef GNUTLS_E_CERTIFICATE_VERIFICATION_ERROR @@ -46,6 +47,19 @@ static int tls_sk_flags = 0; static uint8_t token[32]; static const gnutls_cipher_algorithm_t stream_cipher_algorithm = GNUTLS_CIPHER_CHACHA20_POLY1305; +/* AES-XTS is used for encryption of image files. + * XTS uses two 256-bits AES keys - one key is used to perform + * the AES block encryption; the other is used to encrypt what is + * known as a "tweak value." This encrypted tweak is further modified + * with a Galois polynomial function (GF) and XOR with both the plain + * text and the cipher text of each block. This ensures that blocks of + * identical data will not produce identical cipher text. + */ +static gnutls_cipher_hd_t block_cipher_handle; +static gnutls_datum_t aes_xts_key, aes_xts_iv; +static unsigned int cipher_block_size; +static const gnutls_cipher_algorithm_t block_cipher_algorithm = GNUTLS_CIPHER_AES_256_XTS; + void tls_terminate_session(bool async) { int ret; @@ -423,6 +437,29 @@ static inline int _tls_generate_token(void) return gnutls_rnd(GNUTLS_RND_KEY, &token, sizeof(token)); } +static inline int _aes_xts_generate_key(void) +{ + aes_xts_key.size = gnutls_cipher_get_key_size(block_cipher_algorithm); + aes_xts_key.data = xmalloc(aes_xts_key.size); + pr_debug("Generating encryption key (%u bytes)\n", aes_xts_key.size); + return gnutls_rnd(GNUTLS_RND_KEY, aes_xts_key.data, aes_xts_key.size); +} + +static inline int _aes_xts_generate_iv(void) +{ + aes_xts_iv.size = gnutls_cipher_get_iv_size(block_cipher_algorithm); + aes_xts_iv.data = xmalloc(aes_xts_iv.size); + pr_debug("Generating encryption IV (%u bytes)\n", aes_xts_iv.size); + return gnutls_rnd(GNUTLS_RND_NONCE, aes_xts_iv.data, aes_xts_iv.size); +} + +static int _aes_xts_cipher_init(void) +{ + pr_debug("Initializing %s cipher\n", gnutls_cipher_get_name(block_cipher_algorithm)); + cipher_block_size = gnutls_cipher_get_block_size(block_cipher_algorithm); + return gnutls_cipher_init(&block_cipher_handle, block_cipher_algorithm, &aes_xts_key, &aes_xts_iv); +} + /** * tls_initialize_cipher initializes GnuTLS, loads a public key, * and initializes a cipher context that is used to encrypt the @@ -478,6 +515,24 @@ int tls_initialize_cipher(void) return -1; } + ret = _aes_xts_generate_key(); + if (ret < 0) { + tls_perror("Failed to generate key", ret); + return -1; + } + + ret = _aes_xts_generate_iv(); + if (ret < 0) { + tls_perror("Failed to generate iv", ret); + return -1; + } + + ret = _aes_xts_cipher_init(); + if (ret < 0) { + tls_perror("Failed to initialize cipher", ret); + return -1; + } + gnutls_free(cert_data.data); gnutls_x509_crt_deinit(crt); @@ -620,33 +675,39 @@ int tls_initialize_cipher_from_image(void) if (!x509_key) return -1; + // Initialize private key object ret = gnutls_privkey_init(&privkey); if (ret < 0) { tls_perror("Failed to initialize private key", ret); return -1; } + // Import private key ret = gnutls_privkey_import_x509(privkey, x509_key, 0); if (ret < 0) { tls_perror("Failed to import private key", ret); return -1; } + // Load entry from cipher image ret = pb_read_one(img, &ce, PB_CIPHER); if (ret < 0) { pr_err("Failed to read cipher entry\n"); goto out_close; } + pr_debug("Loading ChaCha20-Poly1305 key from cipher image\n"); + + // Decrypt token ciphertext.data = ce->token.data; ciphertext.size = ce->token.len; - ret = gnutls_privkey_decrypt_data(privkey, 0, &ciphertext, &decrypted_token); if (ret < 0) { tls_perror("Failed to decrypt token data", ret); goto out_close; } + // Validate token size if (decrypted_token.size != sizeof(token)) { pr_err("Invalid token size (%d != %lu)\n", decrypted_token.size, sizeof(token)); goto out_close; @@ -657,6 +718,43 @@ int tls_initialize_cipher_from_image(void) goto out_close; } + pr_debug("Loading AES key from cipher image\n"); + + // Decrypt AES key + ciphertext.data = ce->aes_key.data; + ciphertext.size = ce->aes_key.len; + ret = gnutls_privkey_decrypt_data(privkey, 0, &ciphertext, &decrypted_token); + if (ret < 0) { + tls_perror("Failed to decrypt key data", ret); + goto out_close; + } + + // Validate AES key size + aes_xts_key.size = gnutls_cipher_get_key_size(block_cipher_algorithm); + if (decrypted_token.size != aes_xts_key.size) { + pr_err("Invalid key size (%d != %u)\n", decrypted_token.size, aes_xts_key.size); + goto out_close; + } + aes_xts_key.data = xmalloc(aes_xts_key.size); + memcpy(aes_xts_key.data, decrypted_token.data, decrypted_token.size); + + pr_debug("Loading IV from cipher image\n"); + + aes_xts_iv.size = gnutls_cipher_get_iv_size(block_cipher_algorithm); + if (ce->aes_iv.len != aes_xts_iv.size) { + pr_err("Invalid IV size (%lu != %u)\n", ce->aes_iv.len, aes_xts_iv.size); + goto out_close; + } + aes_xts_iv.data = xmalloc(aes_xts_iv.size); + memcpy(aes_xts_iv.data, ce->aes_iv.data, aes_xts_iv.size); + + // Initialize AES-XTS cipher context + ret = _aes_xts_cipher_init(); + if (ret < 0) { + tls_perror("Failed to initialize cipher", ret); + return -1; + } + ret = 0; out_close: close_image(img); @@ -730,6 +828,20 @@ int write_img_cipher(void) ce.token.len = ciphertext.size; ce.token.data = ciphertext.data; + plaintext.data = aes_xts_key.data; + plaintext.size = aes_xts_key.size; + ret = _encrypt_data_with_pubkey(&plaintext, &ciphertext); + if (ret < 0) { + return -1; + } + ce.has_aes_key = true; + ce.aes_key.len = ciphertext.size; + ce.aes_key.data = ciphertext.data; + + ce.has_aes_iv = true; + ce.aes_iv.len = aes_xts_iv.size; + ce.aes_iv.data = aes_xts_iv.data; + pr_debug("Writing cipher image\n"); img = open_image(CR_FD_CIPHER, O_DUMP); if (!img) @@ -1220,3 +1332,181 @@ int tls_decryption_pipe(int intput_file_fd, int pipe_write_fd) } exit(0); } + +int tls_block_cipher_encrypt_data(void *ptext, size_t ptext_len) +{ + int ret; + + ret = gnutls_cipher_encrypt2(block_cipher_handle, ptext, ptext_len, (void *)ptext, ptext_len); + if (ret < 0) { + tls_perror("Failed to encrypt data", ret); + return -1; + } + return 0; +} + +int tls_block_cipher_decrypt_data(void *ctext, size_t ctext_len) +{ + int ret; + + ret = gnutls_cipher_decrypt2(block_cipher_handle, ctext, ctext_len, (void *)ctext, ctext_len); + if (ret < 0) { + tls_perror("Failed to decrypt data", ret); + return -1; + } + return 0; +} + +/** + * tls_vma_io_pipe forks a child process that reads encrypted data from + * the pages_img_fd and decrypts the data. It uses process_vm_writev() + * to write the decrypted data to the address space of remote process. + * The pipe_read_fd and pipe_write_fd are used to communicate with the + * restorer process (see decrypt_preadv_limited() in criu/pie/restorer.c). + */ +int tls_vma_io_pipe(int pages_img_fd, int pipe_read_fd, int pipe_write_fd) +{ + pid_t child_pid; + int ret, status; + + child_pid = fork(); + if (child_pid == -1) { + pr_perror("Failed to fork"); + return -1; + } + + if (child_pid > 0) { + if (waitpid(child_pid, &status, 0) == -1) { + pr_perror("waitpid() failed"); + return -1; + } + return 0; + } + + child_pid = fork(); + if (child_pid == -1) { + pr_perror("Failed to double fork"); + exit(1); + } + + if (child_pid > 0) { + exit(0); + } + + child_pid = getpid(); + + while (1) { + int nr; + off_t offs; + pid_t remote_pid; + struct iovec *local_iovs, *remote_iovs; + size_t iov_len, total_len = 0; + ssize_t preadv_ret; + + // Read remote PID from pipe. This is the PID value used + // by process_vm_writev() to identify the remote process + ret = read(pipe_read_fd, &remote_pid, sizeof(pid_t)); + if (ret < 0) { + pr_perror("Failed reading offs"); + exit(1); + } + if (ret == 0) { + // End of input + break; + } + + // Read offs and nr from pipe. These are the offset and + // number of iovecs used by preadv() to read data from + // the pages image. The data is then decrypted and written + // to the remote process using process_vm_writev(). + ret = read(pipe_read_fd, &offs, sizeof(off_t)); + if (ret < 0) { + pr_perror("Failed reading offs"); + exit(0); + } + + ret = read(pipe_read_fd, &nr, sizeof(int)); + if (ret < 0) { + pr_perror("Failed reading nr"); + exit(1); + } + + // local_iovs are used to read encrypted data from pages image + // remote_iovs are used to write decrypted data to remote process + // See decrypt_preadv_limited() in criu/pie/restorer.c and man page + // for process_vm_writev(2). + local_iovs = xmalloc(nr * sizeof(struct iovec)); + remote_iovs = xmalloc(nr * sizeof(struct iovec)); + + for (int i = 0; i < nr; i++) { + ret = read(pipe_read_fd, &iov_len, sizeof(size_t)); + if (ret < -1) { + pr_perror("Failed reading iov_len"); + exit(1); + } + + // process_vm_writev() would fail with EINVAL if the + // sum of the iov_len values overflows a ssize_t value + if ((iov_len + total_len) > SSIZE_MAX) { + pr_err("Invalid iov_len value\n"); + exit(1); + } + + local_iovs[i].iov_len = iov_len; + remote_iovs[i].iov_len = iov_len; + total_len += iov_len; + + local_iovs[i].iov_base = xmalloc(iov_len); + if (local_iovs[i].iov_base == NULL) { + exit(1); + } + + ret = read(pipe_read_fd, &remote_iovs[i].iov_base, sizeof(void *)); + if (ret < -1) { + pr_perror("Failed reading iov_len"); + exit(1); + } + } + + // Read encrypted data from pages image into local_iovs + preadv_ret = preadv(pages_img_fd, local_iovs, nr, offs); + if (preadv_ret != total_len) { + pr_perror("Failed reading iovs from image"); + exit(1); + } + + // Decrypt content of images + for (int i = 0; i < nr; i++) { + for (int j = 0; j < local_iovs[i].iov_len; j += PAGE_SIZE) { + if (tls_block_cipher_decrypt_data(local_iovs[i].iov_base + j, PAGE_SIZE)) { + pr_err("Failed to decrypt data\n"); + exit(1); + } + } + } + + // Write decrypted data to remote process address space + ret = process_vm_writev(remote_pid, local_iovs, nr, remote_iovs, nr, 0); + if (ret < 0) { + pr_perror("Failed writing iovs to remote process"); + exit(1); + } + + // Send preadv() return value to the restorer process so + // that it can return it to the caller + ret = write(pipe_write_fd, &preadv_ret, sizeof(ssize_t)); + if (ret < 0) { + pr_perror("Failed writing ret"); + exit(1); + } + + // Cleanup local_iovs and remote_iovs + for (int i = 0; i < nr; i++) { + xfree(local_iovs[i].iov_base); + } + xfree(local_iovs); + xfree(remote_iovs); + } + + exit(0); +} \ No newline at end of file diff --git a/images/cipher.proto b/images/cipher.proto index 801043bc51..c272139568 100644 --- a/images/cipher.proto +++ b/images/cipher.proto @@ -4,4 +4,6 @@ syntax = "proto2"; message cipher_entry { required bytes token = 1; + optional bytes aes_key = 2; + optional bytes aes_iv = 3; }