diff --git a/meson.build b/meson.build
index fa98e58d01..f3848b76a3 100644
--- a/meson.build
+++ b/meson.build
@@ -130,6 +130,7 @@ taisei_c_args = [
     '-fno-math-errno',
     '-fno-signaling-nans',
     '-fno-trapping-math',
+    '-mfp16-format=ieee',
 ]
 
 deprecation_warnings = get_option('deprecation_warnings')
@@ -313,6 +314,128 @@ config.set('TAISEI_BUILDCONF_HAVE_ATTR_MALLOC_WITH_ARGS', cc.compiles(
     args : ['-Wattributes', '-Werror']
 ))
 
+config.set('TAISEI_BUILDCONF_F16_CVT_TYPE', false)
+config.set('TAISEI_BUILDCONF_F16_STORAGE_TYPE', 'uint16_t')
+config.set('TAISEI_BUILDCONF_F16_SIMD_TYPE', false)
+config.set('TAISEI_BUILDCONF_F16_RT_ABI_TYPE', false)
+config.set('TAISEI_BUILDCONF_F16_RT_FUNC_H2F', false)
+config.set('TAISEI_BUILDCONF_F16_RT_FUNC_F2H', false)
+
+float16_cvt_types = {
+    #      cvt : storage
+    '_Float16' : '_Float16',
+    '__fp16'   : 'uint16_t',
+}
+float16_simd_types = ['_Float16']
+float16_rt_abi = 'none'
+float16_rt_abi_typemap = {
+    'native'  : '_Float16',
+    'integer' : 'uint16_t',
+}
+float16_rt_abi_choices = ['auto', 'none'] + float16_rt_abi_typemap.keys()
+float16_rt_funcs = [
+    [ '__extendhfsf2', '__truncsfhf2' ],
+    [ '__gnu_h2f_ieee', '__gnu_f2h_ieee' ],
+]
+
+float16_rt_abi = meson.get_external_property('float16_rt_abi', 'auto')
+if float16_rt_abi not in float16_rt_abi_choices
+    error('float16_rt_abi must be one of @0@'.format(', '.join(float16_rt_abi_choices)))
+endif
+
+float16_have_native_conversion = false
+float16_have_rtlib_conversion = false
+
+foreach cvt_type, storage_type : float16_cvt_types
+    if cc.sizeof(cvt_type) == 2
+        config.set('TAISEI_BUILDCONF_F16_CVT_TYPE', cvt_type)
+        config.set('TAISEI_BUILDCONF_F16_STORAGE_TYPE', storage_type)
+
+        if cvt_type in float16_simd_types
+            config.set('TAISEI_BUILDCONF_F16_SIMD_TYPE', cvt_type)
+        endif
+
+        float16_rt_abi = 'none'
+        float16_have_native_conversion = true
+        break
+    endif
+endforeach
+
+if float16_rt_abi != 'none'
+    foreach funcs : float16_rt_funcs
+        func_h2f = funcs[0]
+        func_f2h = funcs[1]
+
+        if not cc.has_function(func_h2f) or not cc.has_function(func_f2h)
+            continue
+        endif
+
+        if float16_rt_abi == 'auto'
+            float16_rt_abi = 'none'
+
+            foreach abiname, abitype : float16_rt_abi_typemap
+                r = cc.run(f'''
+                    #include <assert.h>
+                    #include <stdint.h>
+
+                    #define NOINLINE __attribute__((noinline))
+
+                    typedef @abitype@ f16_abi_t;
+                    float @func_h2f@(f16_abi_t);
+                    f16_abi_t @func_f2h@(float);
+
+                    NOINLINE float f16_to_f32(uint16_t x) {
+                        union {
+                            f16_abi_t _f16abi;
+                            uint16_t _uint;
+                        } u = { ._uint =  x };
+                        return @func_h2f@(u._f16abi);
+                    }
+
+                    NOINLINE uint16_t f32_to_f16(float  x) {
+                        union {
+                            f16_abi_t _f16abi;
+                            uint16_t _uint;
+                        } u = { ._f16abi =  @func_f2h@(x) };
+                        return u._uint;
+                    }
+
+                    int main(int argc, char **argv) {
+                        volatile float src = 420.69f;
+                        const float expected = 420.75f;
+                        volatile uint16_t half = f32_to_f16(src);
+                        volatile float roundtrip = f16_to_f32(half);
+                        assert(roundtrip == expected);
+                        return 0;
+                    }
+                ''', name : f'Test for @abiname@ float16 ABI')
+
+                if r.compiled() and r.returncode() == 0
+                    float16_rt_abi = abiname
+                    break
+                endif
+            endforeach
+        endif
+
+        if float16_rt_abi == 'none'
+            break
+        endif
+
+        abi_type = float16_rt_abi_typemap[float16_rt_abi]
+        config.set('TAISEI_BUILDCONF_F16_STORAGE_TYPE', abi_type)
+        config.set('TAISEI_BUILDCONF_F16_RT_ABI_TYPE', abi_type)
+        config.set('TAISEI_BUILDCONF_F16_RT_FUNC_H2F', func_h2f)
+        config.set('TAISEI_BUILDCONF_F16_RT_FUNC_F2H', func_f2h)
+
+        if abi_type in float16_simd_types
+            config.set('TAISEI_BUILDCONF_F16_SIMD_TYPE', abi_type)
+        endif
+
+        float16_have_rtlib_conversion = true
+        break
+    endforeach
+endif
+
 prefer_relpath_systems = [
     'windows',
 ]
diff --git a/src/renderer/api.h b/src/renderer/api.h
index c77422efe5..e56e439e71 100644
--- a/src/renderer/api.h
+++ b/src/renderer/api.h
@@ -229,6 +229,7 @@ typedef enum Primitive {
 
 typedef enum VertexAttribType {
 	VA_FLOAT,
+	VA_HALF,
 	VA_BYTE,
 	VA_UBYTE,
 	VA_SHORT,
@@ -482,15 +483,10 @@ typedef struct SpriteParamsBuffer {
 typedef struct SpriteInstanceAttribs {
 	mat4 mv_transform;
 	mat4 tex_transform;
-
-	union {
-		FloatRect texrect;
-		vec4 texrect_vec4;
-	};
-
-	Color rgba;
-	FloatExtent sprite_size;
-	ShaderCustomParams custom;
+	FloatRect texrect;
+	float16_storage_t sprite_size[2];
+	float16_storage_t rgba[4];
+	float16_storage_t custom[4];
 
 	// offsetof(end_of_fields) == size without padding.
 	char end_of_fields;
diff --git a/src/renderer/common/sprite_batch.c b/src/renderer/common/sprite_batch.c
index 3827094777..e637353b85 100644
--- a/src/renderer/common/sprite_batch.c
+++ b/src/renderer/common/sprite_batch.c
@@ -64,7 +64,7 @@ void _r_sprite_batch_init(void) {
 	size_t sz_vert = sizeof(GenericModelVertex);
 	size_t sz_attr = SIZEOF_SPRITE_ATTRIBS;
 
-	#define VERTEX_OFS(attr)   offsetof(GenericModelVertex,  attr)
+	#define VERTEX_OFS(attr)   offsetof(GenericModelVertex,    attr)
 	#define INSTANCE_OFS(attr) offsetof(SpriteInstanceAttribs, attr)
 
 	VertexAttribFormat fmt[] = {
@@ -83,10 +83,10 @@ void _r_sprite_batch_init(void) {
 		{ { 4, VA_FLOAT, VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(tex_transform[1]), 1 },
 		{ { 4, VA_FLOAT, VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(tex_transform[2]), 1 },
 		{ { 4, VA_FLOAT, VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(tex_transform[3]), 1 },
-		{ { 4, VA_FLOAT, VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(rgba),             1 },
+		{ { 4, VA_HALF,  VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(rgba),             1 },
 		{ { 4, VA_FLOAT, VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(texrect),          1 },
-		{ { 2, VA_FLOAT, VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(sprite_size),      1 },
-		{ { 4, VA_FLOAT, VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(custom),           1 },
+		{ { 2, VA_HALF,  VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(sprite_size),      1 },
+		{ { 4, VA_HALF,  VA_CONVERT_FLOAT, 1 }, sz_attr, INSTANCE_OFS(custom),           1 },
 	};
 
 	#undef VERTEX_OFS
@@ -213,12 +213,9 @@ static void _r_sprite_batch_compute_attribs(
 		glm_translate(attribs.mv_transform, (vec3) { ofs.x / imgdims.w, ofs.y / imgdims.h });
 	}
 
-	if(params->color == NULL) {
-		// XXX: should we use r_color_current here?
-		attribs.rgba = *RGBA(1, 1, 1, 1);
-	} else {
-		attribs.rgba = *params->color;
-	}
+	// XXX: should we default to r_color_current here?
+	const Color *color = params->color ?: RGBA(1, 1, 1, 1);
+	f32v4_to_f16v4(attribs.rgba, color->rgba);
 
 	attribs.texrect = spr->tex_area;
 
@@ -232,12 +229,12 @@ static void _r_sprite_batch_compute_attribs(
 		attribs.texrect.h *= -1;
 	}
 
-	attribs.sprite_size = spr->extent;
+	f32v2_to_f16v2(attribs.sprite_size, spr->extent.as_array);
 
 	if(params->shader_params == NULL) {
-		memset(&attribs.custom, 0, sizeof(attribs.custom));
+		memset(attribs.custom, 0, sizeof(attribs.custom));
 	} else {
-		attribs.custom = *params->shader_params;
+		f32v4_to_f16v4(attribs.custom, params->shader_params->vector);
 	}
 
 	*out_attribs = attribs;
diff --git a/src/renderer/gl33/vertex_array.c b/src/renderer/gl33/vertex_array.c
index ad3397b7ac..75b5f288ed 100644
--- a/src/renderer/gl33/vertex_array.c
+++ b/src/renderer/gl33/vertex_array.c
@@ -16,6 +16,7 @@
 
 static GLenum va_type_to_gl_type[] = {
 	[VA_FLOAT]  = GL_FLOAT,
+	[VA_HALF]   = GL_HALF_FLOAT,
 	[VA_BYTE]   = GL_BYTE,
 	[VA_UBYTE]  = GL_UNSIGNED_BYTE,
 	[VA_SHORT]  = GL_SHORT,
diff --git a/src/resource/font.c b/src/resource/font.c
index cb1211c5e1..a6394ff7eb 100644
--- a/src/resource/font.c
+++ b/src/resource/font.c
@@ -1047,21 +1047,14 @@ static double _text_ucs4_draw(Font *font, const uint32_t *ucs4text, const TextPa
 
 	text_ucs4_bbox(font, ucs4text, 0, &bbox);
 
-	Color color;
-
-	if(params->color == NULL) {
-		// XXX: sprite batch code defaults this to RGB(1, 1, 1)
-		color = *r_color_current();
-	} else {
-		color = *params->color;
-	}
-
-	ShaderCustomParams shader_params;
+	SpriteInstanceAttribs init_attribs = {};
+	// XXX: sprite batch code defaults this to RGBA(1, 1, 1, 1)
+	f32v4_to_f16v4(init_attribs.rgba, (params->color ?: r_color_current())->rgba);
 
 	if(params->shader_params == NULL) {
-		memset(&shader_params, 0, sizeof(shader_params));
+		memset(init_attribs.custom, 0, sizeof(init_attribs.custom));
 	} else {
-		shader_params = *params->shader_params;
+		f32v4_to_f16v4(init_attribs.custom, params->shader_params->vector);
 	}
 
 	mat4 mat_texture;
@@ -1131,9 +1124,7 @@ static double _text_ucs4_draw(Font *font, const uint32_t *ucs4text, const TextPa
 			Sprite *spr = &glyph->sprite;
 			set_batch_texture(&batch_state_params, spr->tex);
 
-			SpriteInstanceAttribs attribs;
-			attribs.rgba = color;
-			attribs.custom = shader_params;
+			SpriteInstanceAttribs attribs = init_attribs;
 
 			float g_x = x + glyph->metrics.bearing_x + spr->w * 0.5;
 			float g_y = y - glyph->metrics.bearing_y + spr->h * 0.5 - font->metrics.descent;
@@ -1147,8 +1138,7 @@ static double _text_ucs4_draw(Font *font, const uint32_t *ucs4text, const TextPa
 			attribs.texrect = spr->tex_area;
 
 			// NOTE: Glyphs have their sprite w/h unadjusted for scale.
-			attribs.sprite_size.w = spr->w * iscale;
-			attribs.sprite_size.h = spr->h * iscale;
+			f32v2_to_f16v2(attribs.sprite_size, (float[2]) { spr->w * iscale, spr->h * iscale });
 
 			if(params->glyph_callback.func != NULL) {
 				params->glyph_callback.func(font, uchar, &attribs, params->glyph_callback.userdata);
diff --git a/src/stagedraw.c b/src/stagedraw.c
index 19aea1d3f8..1d6d872d37 100644
--- a/src/stagedraw.c
+++ b/src/stagedraw.c
@@ -1092,7 +1092,7 @@ static int draw_numeric_callback(Font *font, charcode_t charcode, SpriteInstance
 		st->color1 = st->color2;
 	}
 
-	spr_attribs->rgba = *st->color1;
+	f32v4_to_f16v4(spr_attribs->rgba, st->color1->rgba);
 	return 0;
 }
 
diff --git a/src/util.h b/src/util.h
index 1a04b05d97..4919f050c8 100644
--- a/src/util.h
+++ b/src/util.h
@@ -21,6 +21,7 @@
 #include "util/miscmath.h"
 // #include "util/pngcruft.h"
 #include "util/stringops.h"
+#include "util/float16.h"
 
 // FIXME: might not be the best place for these
 #include "log.h"
diff --git a/src/util/float16.h b/src/util/float16.h
new file mode 100644
index 0000000000..d1f7f610e6
--- /dev/null
+++ b/src/util/float16.h
@@ -0,0 +1,148 @@
+/*
+ * This software is licensed under the terms of the MIT License.
+ * See COPYING for further information.
+ * ---
+ * Copyright (c) 2011-2019, Lukas Weber <laochailan@web.de>.
+ * Copyright (c) 2012-2019, Andrei Alexeyev <akari@taisei-project.org>.
+*/
+
+#pragma once
+#include "taisei.h"
+
+/*
+ * NOTE: This is a storage-only format. You must not directly initialize it or perform math
+ * operations on it.
+ */
+typedef struct float16_storage {
+	TAISEI_BUILDCONF_F16_STORAGE_TYPE _storage;
+} float16_storage_t;
+
+#if defined(TAISEI_BUILDCONF_F16_CVT_TYPE)
+
+// Compiler has native support for float16 conversions through a special type.
+// Note that it might not be possible to return that type from functions or pass it as arguments
+// directly.
+
+typedef TAISEI_BUILDCONF_F16_CVT_TYPE float16_cvt_t;
+
+union f16_cvt {
+	float16_cvt_t as_cvt;
+	float16_storage_t as_storage;
+};
+
+attr_const
+INLINE float16_storage_t f32_to_f16(float x) {
+	assert(isfinite(x));
+	return ((union f16_cvt) { .as_cvt = x }).as_storage;
+}
+
+attr_const
+INLINE float f16_to_f32(float16_storage_t x) {
+	return ((union f16_cvt) { .as_storage = x }).as_cvt;
+}
+
+#elif \
+	defined(TAISEI_BUILDCONF_F16_RT_ABI_TYPE) && \
+	defined(TAISEI_BUILDCONF_F16_RT_FUNC_F2H)	 && \
+	defined(TAISEI_BUILDCONF_F16_RT_FUNC_H2F)
+
+// Conversion functions are available as part of the runtime library
+
+typedef TAISEI_BUILDCONF_F16_RT_ABI_TYPE float16_rtabi_t;
+
+float TAISEI_BUILDCONF_F16_RT_FUNC_H2F(float16_rtabi_t);
+float16_rtabi_t TAISEI_BUILDCONF_F16_RT_FUNC_F2H(float);
+
+union f16_rtabi_cvt {
+	float16_rtabi_t as_rtabi;
+	float16_storage_t as_storage;
+};
+
+attr_const
+INLINE float16_storage_t f32_to_f16(float x) {
+	assert(isfinite(x));
+	return ((union f16_rtabi_cvt) {
+		.as_rtabi = TAISEI_BUILDCONF_F16_RT_FUNC_F2H(x)
+	}).as_storage;
+}
+
+attr_const
+INLINE float f16_to_f32(float16_storage_t x) {
+	return TAISEI_BUILDCONF_F16_RT_FUNC_H2F(
+		((union f16_rtabi_cvt) { .as_storage = x }).as_rtabi
+	);
+}
+
+#else
+
+// Resort to vendored fallbacks
+
+float16_storage_t f32_to_f16(float x) attr_const;
+float f16_to_f32(float16_storage_t x) attr_const;
+
+#endif
+
+// Vector operations
+
+#define F16_DEFINE_VECTOR_CONVERSION_SCALAR(vecsize) \
+	INLINE void f32v##vecsize##_to_f16v##vecsize(float16_storage_t dst[vecsize], const float src[vecsize]) { \
+		for(int i = 0; i < vecsize; ++i) { \
+			dst[i] = f32_to_f16(src[i]); \
+		} \
+	} \
+	\
+	INLINE void f16v##vecsize##_to_f32v##vecsize(float dst[vecsize], const float16_storage_t src[vecsize]) { \
+		for(int i = 0; i < vecsize; ++i) { \
+			dst[i] = f16_to_f32(src[i]); \
+		} \
+	}
+
+#ifdef TAISEI_BUILDCONF_F16_SIMD_TYPE
+
+typedef TAISEI_BUILDCONF_F16_SIMD_TYPE f16_simd_t;
+
+// NOTE: Sadly GCC 12 still can't vectorize this, but clang can.
+
+#define F16_DEFINE_VECTOR_CONVERSION(vecsize) \
+	typedef float      f32v##vecsize##simd __attribute__((vector_size(vecsize * sizeof(float)))); \
+	typedef f16_simd_t f16v##vecsize##simd __attribute__((vector_size(vecsize * sizeof(f16_simd_t)))); \
+	\
+	INLINE void f32v##vecsize##_to_f16v##vecsize(float16_storage_t dst[vecsize], const float src[vecsize]) { \
+		f32v##vecsize##simd v32_simd; \
+		memcpy(&v32_simd, src, sizeof(v32_simd)); \
+		auto v16_simd = __builtin_convertvector(v32_simd, f16v##vecsize##simd); \
+		memcpy(dst, &v16_simd, sizeof(v16_simd)); \
+	} \
+	\
+	INLINE void f16v##vecsize##_to_f32v##vecsize(float dst[vecsize], const float16_storage_t src[vecsize]) { \
+		f16v##vecsize##simd v16_simd; \
+		memcpy(&v16_simd, src, sizeof(v16_simd)); \
+		auto v32_simd = __builtin_convertvector(v16_simd, f32v##vecsize##simd); \
+		memcpy(dst, &v32_simd, sizeof(v32_simd)); \
+	}
+
+#else
+
+#define F16_DEFINE_VECTOR_CONVERSION(vecsize) \
+	F16_DEFINE_VECTOR_CONVERSION_SCALAR(vecsize)
+
+#endif
+
+/*
+ * Defines functions:
+ *
+ *		void f16vX_to_f32vX(float dst[X], const float16_storage_t src[X]);
+ *		void f32vX_to_f16vX(float16_storage_t dst[X], const float src[X]);
+ *
+ * Where X is the vector size.
+ */
+
+F16_DEFINE_VECTOR_CONVERSION(4)
+
+#ifdef __clang__
+	F16_DEFINE_VECTOR_CONVERSION(3)
+#else
+	F16_DEFINE_VECTOR_CONVERSION_SCALAR(3)
+#endif
+
+F16_DEFINE_VECTOR_CONVERSION(2)
diff --git a/src/util/float16_fallback.c b/src/util/float16_fallback.c
new file mode 100644
index 0000000000..cc7e155725
--- /dev/null
+++ b/src/util/float16_fallback.c
@@ -0,0 +1,40 @@
+/*
+ * This software is licensed under the terms of the MIT License.
+ * See COPYING for further information.
+ * ---
+ * Copyright (c) 2011-2019, Lukas Weber <laochailan@web.de>.
+ * Copyright (c) 2012-2019, Andrei Alexeyev <akari@taisei-project.org>.
+*/
+
+#include "taisei.h"
+
+#include "float16.h"
+
+// Evil bit hackery stolen from stack overflow: https://stackoverflow.com/a/60047308
+
+float16_storage_t f32_to_f16(float x) {
+	assert(isfinite(x));
+	// IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits
+	uint32_t b = UNION_CAST(float, uint32_t, x) + 0x00001000; // round-to-nearest-even: add last bit after truncated mantissa
+	uint32_t e = (b & 0x7F800000) >> 23; // exponent
+	uint32_t m = (b & 0x007FFFFF); // mantissa; in line below: 0x007FF000 = 0x00800000-0x00001000 = decimal indicator flag - initial rounding
+	return UNION_CAST(uint16_t, float16_storage_t,
+		// sign : normalized : denormalized : saturate
+		(b & 0x80000000) >> 16 | (e > 112) * ((((e - 112) << 10) & 0x7C00) | m >> 13) |
+		((e < 113) & (e > 101)) * ((((0x007FF000 + m) >> (125 - e)) + 1) >> 1) |
+		(e > 143) * 0x7FFF
+	);
+}
+
+float f16_to_f32(float16_storage_t f16) {
+	// IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits
+	uint16_t x = UNION_CAST(float16_storage_t, uint16_t, f16);
+	uint32_t e = (x & 0x7C00) >> 10; // exponent
+	uint32_t m = (x & 0x03FF) << 13; // mantissa
+	uint32_t v = UNION_CAST(float, uint32_t, m) >> 23; // evil log2 bit hack to count leading zeros in denormalized format
+	return UNION_CAST(uint32_t, float,
+		// sign : normalized : denormalized
+		(x & 0x8000) << 16 | (e != 0) * ((e + 112) << 23 | m) |
+		((e == 0) & (m != 0)) * ((v - 37) << 23 | ((m << (150 - v)) & 0x007FE000))
+	);
+}
diff --git a/src/util/meson.build b/src/util/meson.build
index 6fe8aa53c9..df0fd3bb0f 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -43,3 +43,7 @@ if dep_gamemode.found()
 else
     util_src += files('gamemode_stub.c')
 endif
+
+if not (float16_have_native_conversion or float16_have_rtlib_conversion)
+    util_src += files('float16_fallback.c')
+endif