Skip to content

Commit

Permalink
Use ESP32-S3 vector extensions for LUT processing and diffing (#281)
Browse files Browse the repository at this point in the history
Adds optimized versions for 1bpp difference LUT lookup, highlevel framebuffer diffing, and output line masking.
Combined with 120MHz PSRAM (activate in the experimental options), we now get sub-second updates for a 1872x1404 display using epdiy V7.
  • Loading branch information
vroland authored Apr 22, 2024
1 parent 2803a55 commit 29b2b19
Show file tree
Hide file tree
Showing 34 changed files with 1,883 additions and 655 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ set(app_sources "src/epdiy.c"
"src/output_i2s/rmt_pulse.c"
"src/output_i2s/i2s_data_bus.c"
"src/output_common/lut.c"
"src/output_common/lut.S"
"src/output_common/line_queue.c"
"src/output_common/render_context.c"
"src/font.c"
"src/displays.c"
"src/diff.S"
"src/board_specific.c"
"src/builtin_waveforms.c"
"src/highlevel.c"
Expand Down
1 change: 1 addition & 0 deletions examples/demo/main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
set(app_sources "main.c")

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
idf_component_register(SRCS ${app_sources} REQUIRES epdiy)
5 changes: 5 additions & 0 deletions examples/demo/main/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ void idf_setup() {
epd_rotated_display_height()
);

// The display bus settings for V7 may be conservative, you can manually
// override the bus speed to tune for speed, i.e., if you set the PSRAM speed
// to 120MHz.
// epd_set_lcd_pixel_clock_MHz(17);

heap_caps_print_heap_info(MALLOC_CAP_INTERNAL);
heap_caps_print_heap_info(MALLOC_CAP_SPIRAM);
}
Expand Down
11 changes: 11 additions & 0 deletions examples/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# This is the project CMakeLists.txt file for the test subproject
cmake_minimum_required(VERSION 3.16)

# Add newly added components to one of these lines:
# 1. Add here if the component is compatible with IDF >= v4.3
set(EXTRA_COMPONENT_DIRS "../../")

set(TEST_COMPONENTS "epdiy")

include($ENV{IDF_PATH}/tools/cmake/project.cmake)
project(epdiy_testrunner)
3 changes: 3 additions & 0 deletions examples/test/main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

idf_component_register(SRCS "main.c" INCLUDE_DIRS ".")
19 changes: 19 additions & 0 deletions examples/test/main/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include <stdio.h>
#include <unity.h>
#include "unity_test_runner.h"

static void print_banner(const char* text)
{
printf("\n#### %s #####\n\n", text);
}


void app_main(void)
{
print_banner("Running all the registered tests");
UNITY_BEGIN();
//unity_run_tests_by_tag("unit", false);
unity_run_all_tests();
UNITY_END();
}

2 changes: 1 addition & 1 deletion src/board/epd_board.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void epd_set_board(const EpdBoardDefinition *board_definition) {
if (epd_board == NULL) {
epd_board = board_definition;
} else {
ESP_LOGE("epdiy", "EPD board can only be set once!");
ESP_LOGW("epdiy", "EPD board can only be set once!");
}
}

Expand Down
6 changes: 5 additions & 1 deletion src/board/epd_board_v6.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "driver/gpio.h"
#include "epd_board.h"

#include <esp_log.h>
Expand Down Expand Up @@ -144,8 +145,11 @@ static void epd_board_deinit() {
vTaskDelay(500);
pca9555_read_input(config_reg.port, 0);
pca9555_read_input(config_reg.port, 1);
ESP_LOGI("epdiy", "going to sleep.");
i2c_driver_delete(EPDIY_I2C_PORT);
gpio_isr_handler_remove(CFG_INTR);
gpio_uninstall_isr_service();
gpio_reset_pin(CFG_INTR);
gpio_reset_pin(V4_LATCH_ENABLE);
}

static void epd_board_set_ctrl(epd_ctrl_state_t *state, const epd_ctrl_state_t * const mask) {
Expand Down
45 changes: 23 additions & 22 deletions src/board/epd_board_v7.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,22 +92,22 @@ static lcd_bus_config_t lcd_config = {
.leh = LEH,
.start_pulse = STH,
.stv = STV,
.data_0 = D0,
.data_1 = D1,
.data_2 = D2,
.data_3 = D3,
.data_4 = D4,
.data_5 = D5,
.data_6 = D6,
.data_7 = D7,
.data_8 = D8,
.data_9 = D9,
.data_10 = D10,
.data_11 = D11,
.data_12 = D12,
.data_13 = D13,
.data_14 = D14,
.data_15 = D15,
.data[0] = D0,
.data[1] = D1,
.data[2] = D2,
.data[3] = D3,
.data[4] = D4,
.data[5] = D5,
.data[6] = D6,
.data[7] = D7,
.data[8] = D8,
.data[9] = D9,
.data[10] = D10,
.data[11] = D11,
.data[12] = D12,
.data[13] = D13,
.data[14] = D14,
.data[15] = D15,
};

static void epd_board_init(uint32_t epd_row_width) {
Expand Down Expand Up @@ -158,24 +158,27 @@ static void epd_board_init(uint32_t epd_row_width) {

static void epd_board_deinit() {

epd_lcd_deinit();

ESP_ERROR_CHECK(pca9555_set_config(config_reg.port, CFG_PIN_PWRGOOD | CFG_PIN_INT | CFG_PIN_VCOM_CTRL | CFG_PIN_PWRUP, 1));

int tries = 0;
while (!((pca9555_read_input(config_reg.port, 1) & 0xC0) == 0x80)) {
if (tries >= 500) {
if (tries >= 50) {
ESP_LOGE("epdiy", "failed to shut down TPS65185!");
break;
}
tries++;
vTaskDelay(1);
printf("%X\n", pca9555_read_input(config_reg.port, 1));
}

// Not sure why we need this delay, but the TPS65185 seems to generate an interrupt after some time that needs to be cleared.
vTaskDelay(500);
vTaskDelay(50);
pca9555_read_input(config_reg.port, 0);
pca9555_read_input(config_reg.port, 1);
ESP_LOGI("epdiy", "going to sleep.");
i2c_driver_delete(EPDIY_I2C_PORT);

gpio_uninstall_isr_service();
}

static void epd_board_set_ctrl(epd_ctrl_state_t *state, const epd_ctrl_state_t * const mask) {
Expand Down Expand Up @@ -214,8 +217,6 @@ static void epd_board_poweron(epd_ctrl_state_t *state) {
while (!(pca9555_read_input(config_reg.port, 1) & CFG_PIN_PWRGOOD)) {
}

printf("PG is up\n");

ESP_ERROR_CHECK(tps_write_register(config_reg.port, TPS_REG_ENABLE, 0x3F));

tps_set_vcom(config_reg.port, vcom);
Expand Down
9 changes: 9 additions & 0 deletions src/board/pca9555.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ static esp_err_t i2c_master_read_slave(i2c_port_t i2c_num, uint8_t* data_rd, siz
return ESP_OK;
}
i2c_cmd_handle_t cmd = i2c_cmd_link_create();
if (cmd == NULL) {
ESP_LOGE("epdiy", "insufficient memory for I2C transaction");
}
i2c_master_start(cmd);
i2c_master_write_byte(cmd, ( EPDIY_PCA9555_ADDR << 1 ) | I2C_MASTER_WRITE, true);
i2c_master_write_byte(cmd, reg, true);
Expand All @@ -37,6 +40,9 @@ static esp_err_t i2c_master_read_slave(i2c_port_t i2c_num, uint8_t* data_rd, siz
i2c_cmd_link_delete(cmd);

cmd = i2c_cmd_link_create();
if (cmd == NULL) {
ESP_LOGE("epdiy", "insufficient memory for I2C transaction");
}
i2c_master_start(cmd);
i2c_master_write_byte(cmd, ( EPDIY_PCA9555_ADDR << 1 ) | I2C_MASTER_READ, true);
if (size > 1) {
Expand All @@ -57,6 +63,9 @@ static esp_err_t i2c_master_read_slave(i2c_port_t i2c_num, uint8_t* data_rd, siz
static esp_err_t i2c_master_write_slave(i2c_port_t i2c_num, uint8_t ctrl, uint8_t* data_wr, size_t size)
{
i2c_cmd_handle_t cmd = i2c_cmd_link_create();
if (cmd == NULL) {
ESP_LOGE("epdiy", "insufficient memory for I2C transaction");
}
i2c_master_start(cmd);
i2c_master_write_byte(cmd, ( EPDIY_PCA9555_ADDR << 1 ) | I2C_MASTER_WRITE, true);
i2c_master_write_byte(cmd, ctrl, true);
Expand Down
159 changes: 159 additions & 0 deletions src/diff.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#include <xtensa/config/core-isa.h>
#include <xtensa/config/core-matmap.h>
#include "output_common/render_method.h"

#ifdef RENDER_METHOD_LCD

.text
.align 4
.global epd_interlace_4bpp_line_VE
.type epd_interlace_4bpp_line_VE,@function

// // CRASH AND BURN for debugging
// EE.MOVI.32.A q3, a2, 0
// EE.MOVI.32.A q3, a3, 1
// EE.MOVI.32.A q3, a4, 2
// EE.MOVI.32.A q3, a5, 3
// l8ui a10, a10, 0

// bool interlace_line(
// const uint8_t *to,
// const uint8_t *from,
// uint8_t *col_dirtyness;
// uint8_t *interlaced,
// int fb_width
// )
epd_interlace_4bpp_line_VE:
// to - a2
// from - a3
// interlaced - a4
// col_dirtyness - a5
// fb_width - a6

entry a1, 32

// divide by 32 for loop count
srli a11, a6, 5

movi.n a10, 0xF0F0F0F0;
EE.MOVI.32.Q q6,a10,0
EE.MOVI.32.Q q6,a10,1
EE.MOVI.32.Q q6,a10,2
EE.MOVI.32.Q q6,a10,3

movi.n a10, 0x0F0F0F0F
EE.MOVI.32.Q q7,a10,0
EE.MOVI.32.Q q7,a10,1
EE.MOVI.32.Q q7,a10,2
EE.MOVI.32.Q q7,a10,3

// put 4 into shift amount
movi.n a10, 4
WSR.SAR a10

// "dirtyness" register
EE.ZERO.Q q5

// Instructions sometimes are in an unexpected order
// for best pipeline utilization
loopnez a11, .loop_end_difference

EE.VLD.128.IP q0, a2, 16
EE.VLD.128.IP q1, a3, 16

// load column dirtyness
EE.VLD.128.IP q3, a5, 0

// update dirtyness
EE.XORQ q4, q1, q0

// line dirtyness accumulator
EE.ORQ q5, q5, q4
// column dirtyness
EE.ORQ q3, q3, q4

// store column dirtyness
EE.VST.128.IP q3, a5, 16

// mask out every second value
EE.ANDQ q2, q0, q7
EE.ANDQ q0, q0, q6
EE.ANDQ q3, q1, q7
EE.ANDQ q1, q1, q6

// shift vectors to align
EE.VSL.32 q2, q2
EE.VSR.32 q1, q1

// the right shift sign-extends,
// so we make sure the resulting shift is logical by masking again
EE.ANDQ q1, q1, q7

// Combine "from" and "to" nibble
EE.ORQ q2, q2, q3
EE.ORQ q0, q0, q1

// Zip masked out values together
EE.VZIP.8 q2, q0

// store interlaced buffer data
EE.VST.128.IP q2, a4, 16
EE.VST.128.IP q0, a4, 16

.loop_end_difference:

EE.MOVI.32.A q5, a2, 0
EE.MOVI.32.A q5, a3, 1
EE.MOVI.32.A q5, a4, 2
EE.MOVI.32.A q5, a5, 3
or a2, a2, a3
or a2, a2, a4
or a2, a2, a5

//movi.n a2, 1 // return "true"

// CRASH AND BURN for debugging
//EE.MOVI.32.A q5, a2, 0
//EE.MOVI.32.A q5, a3, 1
//EE.MOVI.32.A q5, a4, 2
//EE.MOVI.32.A q5, a5, 3
//movi.n a10, 0
//l8ui a10, a10, 0

retw.n


.global epd_apply_line_mask_VE
.type epd_apply_line_mask_VE,@function

// void epd_apply_line_mask_VE(
// uint8_t *line,
// const uint8_t *mask,
// int mask_len
// )
epd_apply_line_mask_VE:
// line - a2
// mask - a3
// mask_len - a4

entry a1, 32

// divide by 16 for loop count
srli a4, a4, 4

// Instructions sometimes are in an unexpected order
// for best pipeline utilization
loopnez a4, .loop_end_mask

EE.VLD.128.IP q0, a2, 0
EE.VLD.128.IP q1, a3, 16

EE.ANDQ q0, q0, q1

EE.VST.128.IP q0, a2, 16

.loop_end_mask:

retw.n

#endif
2 changes: 2 additions & 0 deletions src/epd_highlevel.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ typedef struct {
uint8_t* difference_fb;
/// Tainted lines based on the last difference calculation.
bool* dirty_lines;
/// Tainted column nibbles based on the last difference calculation.
uint8_t* dirty_columns;
/// The waveform information to use.
const EpdWaveform* waveform;
} EpdiyHighlevelState;
Expand Down
2 changes: 1 addition & 1 deletion src/epdiy.c
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ enum EpdDrawError epd_draw_image(EpdRect area, const uint8_t *data, const EpdWav
.width = 0,
.height = 0,
};
return epd_draw_base(area, data, no_crop, EPD_MODE_DEFAULT, temperature, NULL, waveform);
return epd_draw_base(area, data, no_crop, EPD_MODE_DEFAULT, temperature, NULL, NULL, waveform);
}

void epd_set_rotation(enum EpdRotation rotation) {
Expand Down
Loading

0 comments on commit 29b2b19

Please sign in to comment.