diff --git a/CMakeLists.txt b/CMakeLists.txt index 981daa38..053177ae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,10 +7,12 @@ set(app_sources "src/epdiy.c" "src/output_i2s/rmt_pulse.c" "src/output_i2s/i2s_data_bus.c" "src/output_common/lut.c" + "src/output_common/lut.S" "src/output_common/line_queue.c" "src/output_common/render_context.c" "src/font.c" "src/displays.c" + "src/diff.S" "src/board_specific.c" "src/builtin_waveforms.c" "src/highlevel.c" diff --git a/examples/demo/main/CMakeLists.txt b/examples/demo/main/CMakeLists.txt index 475be06b..a107619b 100644 --- a/examples/demo/main/CMakeLists.txt +++ b/examples/demo/main/CMakeLists.txt @@ -1,3 +1,4 @@ set(app_sources "main.c") +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) idf_component_register(SRCS ${app_sources} REQUIRES epdiy) diff --git a/examples/demo/main/main.c b/examples/demo/main/main.c index a6c59101..6eccb1c5 100644 --- a/examples/demo/main/main.c +++ b/examples/demo/main/main.c @@ -54,6 +54,11 @@ void idf_setup() { epd_rotated_display_height() ); + // The display bus settings for V7 may be conservative, you can manually + // override the bus speed to tune for speed, i.e., if you set the PSRAM speed + // to 120MHz. + // epd_set_lcd_pixel_clock_MHz(17); + heap_caps_print_heap_info(MALLOC_CAP_INTERNAL); heap_caps_print_heap_info(MALLOC_CAP_SPIRAM); } diff --git a/examples/test/CMakeLists.txt b/examples/test/CMakeLists.txt new file mode 100644 index 00000000..849a8202 --- /dev/null +++ b/examples/test/CMakeLists.txt @@ -0,0 +1,11 @@ +# This is the project CMakeLists.txt file for the test subproject +cmake_minimum_required(VERSION 3.16) + +# Add newly added components to one of these lines: +# 1. Add here if the component is compatible with IDF >= v4.3 +set(EXTRA_COMPONENT_DIRS "../../") + +set(TEST_COMPONENTS "epdiy") + +include($ENV{IDF_PATH}/tools/cmake/project.cmake) +project(epdiy_testrunner) diff --git a/examples/test/main/CMakeLists.txt b/examples/test/main/CMakeLists.txt new file mode 100644 index 00000000..e727913e --- /dev/null +++ b/examples/test/main/CMakeLists.txt @@ -0,0 +1,3 @@ +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +idf_component_register(SRCS "main.c" INCLUDE_DIRS ".") diff --git a/examples/test/main/main.c b/examples/test/main/main.c new file mode 100644 index 00000000..ec272d72 --- /dev/null +++ b/examples/test/main/main.c @@ -0,0 +1,19 @@ +#include +#include +#include "unity_test_runner.h" + +static void print_banner(const char* text) +{ + printf("\n#### %s #####\n\n", text); +} + + +void app_main(void) +{ + print_banner("Running all the registered tests"); + UNITY_BEGIN(); + //unity_run_tests_by_tag("unit", false); + unity_run_all_tests(); + UNITY_END(); +} + diff --git a/src/board/epd_board.c b/src/board/epd_board.c index 8ccaa460..d1fa8b1e 100644 --- a/src/board/epd_board.c +++ b/src/board/epd_board.c @@ -25,7 +25,7 @@ void epd_set_board(const EpdBoardDefinition *board_definition) { if (epd_board == NULL) { epd_board = board_definition; } else { - ESP_LOGE("epdiy", "EPD board can only be set once!"); + ESP_LOGW("epdiy", "EPD board can only be set once!"); } } diff --git a/src/board/epd_board_v6.c b/src/board/epd_board_v6.c index 643c788a..39789228 100644 --- a/src/board/epd_board_v6.c +++ b/src/board/epd_board_v6.c @@ -1,3 +1,4 @@ +#include "driver/gpio.h" #include "epd_board.h" #include @@ -144,8 +145,11 @@ static void epd_board_deinit() { vTaskDelay(500); pca9555_read_input(config_reg.port, 0); pca9555_read_input(config_reg.port, 1); - ESP_LOGI("epdiy", "going to sleep."); i2c_driver_delete(EPDIY_I2C_PORT); + gpio_isr_handler_remove(CFG_INTR); + gpio_uninstall_isr_service(); + gpio_reset_pin(CFG_INTR); + gpio_reset_pin(V4_LATCH_ENABLE); } static void epd_board_set_ctrl(epd_ctrl_state_t *state, const epd_ctrl_state_t * const mask) { diff --git a/src/board/epd_board_v7.c b/src/board/epd_board_v7.c index 8dee357c..f878f41d 100644 --- a/src/board/epd_board_v7.c +++ b/src/board/epd_board_v7.c @@ -92,22 +92,22 @@ static lcd_bus_config_t lcd_config = { .leh = LEH, .start_pulse = STH, .stv = STV, - .data_0 = D0, - .data_1 = D1, - .data_2 = D2, - .data_3 = D3, - .data_4 = D4, - .data_5 = D5, - .data_6 = D6, - .data_7 = D7, - .data_8 = D8, - .data_9 = D9, - .data_10 = D10, - .data_11 = D11, - .data_12 = D12, - .data_13 = D13, - .data_14 = D14, - .data_15 = D15, + .data[0] = D0, + .data[1] = D1, + .data[2] = D2, + .data[3] = D3, + .data[4] = D4, + .data[5] = D5, + .data[6] = D6, + .data[7] = D7, + .data[8] = D8, + .data[9] = D9, + .data[10] = D10, + .data[11] = D11, + .data[12] = D12, + .data[13] = D13, + .data[14] = D14, + .data[15] = D15, }; static void epd_board_init(uint32_t epd_row_width) { @@ -158,24 +158,27 @@ static void epd_board_init(uint32_t epd_row_width) { static void epd_board_deinit() { + epd_lcd_deinit(); + ESP_ERROR_CHECK(pca9555_set_config(config_reg.port, CFG_PIN_PWRGOOD | CFG_PIN_INT | CFG_PIN_VCOM_CTRL | CFG_PIN_PWRUP, 1)); int tries = 0; while (!((pca9555_read_input(config_reg.port, 1) & 0xC0) == 0x80)) { - if (tries >= 500) { + if (tries >= 50) { ESP_LOGE("epdiy", "failed to shut down TPS65185!"); break; } tries++; vTaskDelay(1); - printf("%X\n", pca9555_read_input(config_reg.port, 1)); } + // Not sure why we need this delay, but the TPS65185 seems to generate an interrupt after some time that needs to be cleared. - vTaskDelay(500); + vTaskDelay(50); pca9555_read_input(config_reg.port, 0); pca9555_read_input(config_reg.port, 1); - ESP_LOGI("epdiy", "going to sleep."); i2c_driver_delete(EPDIY_I2C_PORT); + + gpio_uninstall_isr_service(); } static void epd_board_set_ctrl(epd_ctrl_state_t *state, const epd_ctrl_state_t * const mask) { @@ -214,8 +217,6 @@ static void epd_board_poweron(epd_ctrl_state_t *state) { while (!(pca9555_read_input(config_reg.port, 1) & CFG_PIN_PWRGOOD)) { } - printf("PG is up\n"); - ESP_ERROR_CHECK(tps_write_register(config_reg.port, TPS_REG_ENABLE, 0x3F)); tps_set_vcom(config_reg.port, vcom); diff --git a/src/board/pca9555.c b/src/board/pca9555.c index f769e955..c71260f1 100644 --- a/src/board/pca9555.c +++ b/src/board/pca9555.c @@ -25,6 +25,9 @@ static esp_err_t i2c_master_read_slave(i2c_port_t i2c_num, uint8_t* data_rd, siz return ESP_OK; } i2c_cmd_handle_t cmd = i2c_cmd_link_create(); + if (cmd == NULL) { + ESP_LOGE("epdiy", "insufficient memory for I2C transaction"); + } i2c_master_start(cmd); i2c_master_write_byte(cmd, ( EPDIY_PCA9555_ADDR << 1 ) | I2C_MASTER_WRITE, true); i2c_master_write_byte(cmd, reg, true); @@ -37,6 +40,9 @@ static esp_err_t i2c_master_read_slave(i2c_port_t i2c_num, uint8_t* data_rd, siz i2c_cmd_link_delete(cmd); cmd = i2c_cmd_link_create(); + if (cmd == NULL) { + ESP_LOGE("epdiy", "insufficient memory for I2C transaction"); + } i2c_master_start(cmd); i2c_master_write_byte(cmd, ( EPDIY_PCA9555_ADDR << 1 ) | I2C_MASTER_READ, true); if (size > 1) { @@ -57,6 +63,9 @@ static esp_err_t i2c_master_read_slave(i2c_port_t i2c_num, uint8_t* data_rd, siz static esp_err_t i2c_master_write_slave(i2c_port_t i2c_num, uint8_t ctrl, uint8_t* data_wr, size_t size) { i2c_cmd_handle_t cmd = i2c_cmd_link_create(); + if (cmd == NULL) { + ESP_LOGE("epdiy", "insufficient memory for I2C transaction"); + } i2c_master_start(cmd); i2c_master_write_byte(cmd, ( EPDIY_PCA9555_ADDR << 1 ) | I2C_MASTER_WRITE, true); i2c_master_write_byte(cmd, ctrl, true); diff --git a/src/diff.S b/src/diff.S new file mode 100644 index 00000000..ab71794e --- /dev/null +++ b/src/diff.S @@ -0,0 +1,159 @@ +#include +#include +#include "output_common/render_method.h" + +#ifdef RENDER_METHOD_LCD + +.text +.align 4 +.global epd_interlace_4bpp_line_VE +.type epd_interlace_4bpp_line_VE,@function + +// // CRASH AND BURN for debugging +// EE.MOVI.32.A q3, a2, 0 +// EE.MOVI.32.A q3, a3, 1 +// EE.MOVI.32.A q3, a4, 2 +// EE.MOVI.32.A q3, a5, 3 +// l8ui a10, a10, 0 + +// bool interlace_line( +// const uint8_t *to, +// const uint8_t *from, +// uint8_t *col_dirtyness; +// uint8_t *interlaced, +// int fb_width +// ) +epd_interlace_4bpp_line_VE: +// to - a2 +// from - a3 +// interlaced - a4 +// col_dirtyness - a5 +// fb_width - a6 + + entry a1, 32 + + // divide by 32 for loop count + srli a11, a6, 5 + + movi.n a10, 0xF0F0F0F0; + EE.MOVI.32.Q q6,a10,0 + EE.MOVI.32.Q q6,a10,1 + EE.MOVI.32.Q q6,a10,2 + EE.MOVI.32.Q q6,a10,3 + + movi.n a10, 0x0F0F0F0F + EE.MOVI.32.Q q7,a10,0 + EE.MOVI.32.Q q7,a10,1 + EE.MOVI.32.Q q7,a10,2 + EE.MOVI.32.Q q7,a10,3 + + // put 4 into shift amount + movi.n a10, 4 + WSR.SAR a10 + + // "dirtyness" register + EE.ZERO.Q q5 + + // Instructions sometimes are in an unexpected order + // for best pipeline utilization + loopnez a11, .loop_end_difference + + EE.VLD.128.IP q0, a2, 16 + EE.VLD.128.IP q1, a3, 16 + + // load column dirtyness + EE.VLD.128.IP q3, a5, 0 + + // update dirtyness + EE.XORQ q4, q1, q0 + + // line dirtyness accumulator + EE.ORQ q5, q5, q4 + // column dirtyness + EE.ORQ q3, q3, q4 + + // store column dirtyness + EE.VST.128.IP q3, a5, 16 + + // mask out every second value + EE.ANDQ q2, q0, q7 + EE.ANDQ q0, q0, q6 + EE.ANDQ q3, q1, q7 + EE.ANDQ q1, q1, q6 + + // shift vectors to align + EE.VSL.32 q2, q2 + EE.VSR.32 q1, q1 + + // the right shift sign-extends, + // so we make sure the resulting shift is logical by masking again + EE.ANDQ q1, q1, q7 + + // Combine "from" and "to" nibble + EE.ORQ q2, q2, q3 + EE.ORQ q0, q0, q1 + + // Zip masked out values together + EE.VZIP.8 q2, q0 + + // store interlaced buffer data + EE.VST.128.IP q2, a4, 16 + EE.VST.128.IP q0, a4, 16 + +.loop_end_difference: + + EE.MOVI.32.A q5, a2, 0 + EE.MOVI.32.A q5, a3, 1 + EE.MOVI.32.A q5, a4, 2 + EE.MOVI.32.A q5, a5, 3 + or a2, a2, a3 + or a2, a2, a4 + or a2, a2, a5 + + //movi.n a2, 1 // return "true" + + // CRASH AND BURN for debugging + //EE.MOVI.32.A q5, a2, 0 + //EE.MOVI.32.A q5, a3, 1 + //EE.MOVI.32.A q5, a4, 2 + //EE.MOVI.32.A q5, a5, 3 + //movi.n a10, 0 + //l8ui a10, a10, 0 + + retw.n + + +.global epd_apply_line_mask_VE +.type epd_apply_line_mask_VE,@function + +// void epd_apply_line_mask_VE( +// uint8_t *line, +// const uint8_t *mask, +// int mask_len +// ) +epd_apply_line_mask_VE: +// line - a2 +// mask - a3 +// mask_len - a4 + + entry a1, 32 + + // divide by 16 for loop count + srli a4, a4, 4 + + // Instructions sometimes are in an unexpected order + // for best pipeline utilization + loopnez a4, .loop_end_mask + + EE.VLD.128.IP q0, a2, 0 + EE.VLD.128.IP q1, a3, 16 + + EE.ANDQ q0, q0, q1 + + EE.VST.128.IP q0, a2, 16 + +.loop_end_mask: + + retw.n + +#endif \ No newline at end of file diff --git a/src/epd_highlevel.h b/src/epd_highlevel.h index 12469205..1a867c59 100644 --- a/src/epd_highlevel.h +++ b/src/epd_highlevel.h @@ -78,6 +78,8 @@ typedef struct { uint8_t* difference_fb; /// Tainted lines based on the last difference calculation. bool* dirty_lines; + /// Tainted column nibbles based on the last difference calculation. + uint8_t* dirty_columns; /// The waveform information to use. const EpdWaveform* waveform; } EpdiyHighlevelState; diff --git a/src/epdiy.c b/src/epdiy.c index d35c2ed7..a90bd64d 100644 --- a/src/epdiy.c +++ b/src/epdiy.c @@ -382,7 +382,7 @@ enum EpdDrawError epd_draw_image(EpdRect area, const uint8_t *data, const EpdWav .width = 0, .height = 0, }; - return epd_draw_base(area, data, no_crop, EPD_MODE_DEFAULT, temperature, NULL, waveform); + return epd_draw_base(area, data, no_crop, EPD_MODE_DEFAULT, temperature, NULL, NULL, waveform); } void epd_set_rotation(enum EpdRotation rotation) { diff --git a/src/epdiy.h b/src/epdiy.h index d85b655a..8fb94708 100644 --- a/src/epdiy.h +++ b/src/epdiy.h @@ -8,22 +8,22 @@ extern "C" { #endif #pragma once +#include #include #include -#include #include "epd_internals.h" /// An area on the display. typedef struct { - /// Horizontal position. - int x; - /// Vertical position. - int y; - /// Area / image width, must be positive. - int width; - /// Area / image height, must be positive. - int height; + /// Horizontal position. + int x; + /// Vertical position. + int y; + /// Area / image width, must be positive. + int width; + /// Area / image height, must be positive. + int height; } EpdRect; /// Global EPD driver options. @@ -47,67 +47,71 @@ enum EpdInitOptions { /// The image drawing mode. enum EpdDrawMode { - /// An init waveform. - /// This is currently unused, use `epd_clear()` instead. - MODE_INIT = 0x0, - /// Direct Update: Go from any color to black for white only. - MODE_DU = 0x1, - /// Go from any grayscale value to another with a flashing update. - MODE_GC16 = 0x2, - /// Faster version of `MODE_GC16`. - /// Not available with default epdiy waveforms. - MODE_GC16_FAST = 0x3, - /// Animation Mode: Fast, monochrom updates. - /// Not available with default epdiy waveforms. - MODE_A2 = 0x4, - /// Go from any grayscale value to another with a non-flashing update. - MODE_GL16 = 0x5, - /// Faster version of `MODE_GL16`. - /// Not available with default epdiy waveforms. - MODE_GL16_FAST = 0x6, - /// A 4-grayscale version of `MODE_DU`. - /// Not available with default epdiy waveforms. - MODE_DU4 = 0x7, - /// Arbitrary transitions for 4 grayscale values. - /// Not available with default epdiy waveforms. - MODE_GL4 = 0xA, - /// Not available with default epdiy waveforms. - MODE_GL16_INV = 0xB, - - /// Go from a white screen to arbitrary grayscale, quickly. - /// Exclusively available with epdiy waveforms. - MODE_EPDIY_WHITE_TO_GL16 = 0x10, - /// Go from a black screen to arbitrary grayscale, quickly. - /// Exclusively available with epdiy waveforms. - MODE_EPDIY_BLACK_TO_GL16 = 0x11, - - /// Monochrome mode. Only supported with 1bpp buffers. - MODE_EPDIY_MONOCHROME = 0x20, - - MODE_UNKNOWN_WAVEFORM = 0x3F, - - // Framebuffer packing modes - /// 1 bit-per-pixel framebuffer with 0 = black, 1 = white. - /// MSB is left is the leftmost pixel, LSB the rightmost pixel. - MODE_PACKING_8PPB = 0x40, - /// 4 bit-per pixel framebuffer with 0x0 = black, 0xF = white. - /// The upper nibble corresponds to the left pixel. - /// A byte cannot wrap over multiple rows, images of uneven width - /// must add a padding nibble per line. - MODE_PACKING_2PPB = 0x80, - /// A difference image with one pixel per byte. - /// The upper nibble marks the "from" color, - /// the lower nibble the "to" color. - MODE_PACKING_1PPB_DIFFERENCE = 0x100, - // reserver for 4PPB mode - - /// Assert that the display has a uniform color, e.g. after initialization. - /// If `MODE_PACKING_2PPB` is specified, a optimized output calculation can be used. - /// Draw on a white background - PREVIOUSLY_WHITE = 0x200, - /// See `PREVIOUSLY_WHITE`. - /// Draw on a black background - PREVIOUSLY_BLACK = 0x400, + /// An init waveform. + /// This is currently unused, use `epd_clear()` instead. + MODE_INIT = 0x0, + /// Direct Update: Go from any color to black for white only. + MODE_DU = 0x1, + /// Go from any grayscale value to another with a flashing update. + MODE_GC16 = 0x2, + /// Faster version of `MODE_GC16`. + /// Not available with default epdiy waveforms. + MODE_GC16_FAST = 0x3, + /// Animation Mode: Fast, monochrom updates. + /// Not available with default epdiy waveforms. + MODE_A2 = 0x4, + /// Go from any grayscale value to another with a non-flashing update. + MODE_GL16 = 0x5, + /// Faster version of `MODE_GL16`. + /// Not available with default epdiy waveforms. + MODE_GL16_FAST = 0x6, + /// A 4-grayscale version of `MODE_DU`. + /// Not available with default epdiy waveforms. + MODE_DU4 = 0x7, + /// Arbitrary transitions for 4 grayscale values. + /// Not available with default epdiy waveforms. + MODE_GL4 = 0xA, + /// Not available with default epdiy waveforms. + MODE_GL16_INV = 0xB, + + /// Go from a white screen to arbitrary grayscale, quickly. + /// Exclusively available with epdiy waveforms. + MODE_EPDIY_WHITE_TO_GL16 = 0x10, + /// Go from a black screen to arbitrary grayscale, quickly. + /// Exclusively available with epdiy waveforms. + MODE_EPDIY_BLACK_TO_GL16 = 0x11, + + /// Monochrome mode. Only supported with 1bpp buffers. + MODE_EPDIY_MONOCHROME = 0x20, + + MODE_UNKNOWN_WAVEFORM = 0x3F, + + // Framebuffer packing modes + /// 1 bit-per-pixel framebuffer with 0 = black, 1 = white. + /// MSB is left is the leftmost pixel, LSB the rightmost pixel. + MODE_PACKING_8PPB = 0x40, + /// 4 bit-per pixel framebuffer with 0x0 = black, 0xF = white. + /// The upper nibble corresponds to the left pixel. + /// A byte cannot wrap over multiple rows, images of uneven width + /// must add a padding nibble per line. + MODE_PACKING_2PPB = 0x80, + /// A difference image with one pixel per byte. + /// The upper nibble marks the "from" color, + /// the lower nibble the "to" color. + MODE_PACKING_1PPB_DIFFERENCE = 0x100, + // reserver for 4PPB mode + + /// Assert that the display has a uniform color, e.g. after initialization. + /// If `MODE_PACKING_2PPB` is specified, a optimized output calculation can be used. + /// Draw on a white background + PREVIOUSLY_WHITE = 0x200, + /// See `PREVIOUSLY_WHITE`. + /// Draw on a black background + PREVIOUSLY_BLACK = 0x400, + + /// Enforce NOT using S3 Vector extensions. + /// USed for testing. + MODE_FORCE_NO_PIE = 0x800, }; /** Display software rotation. @@ -124,41 +128,41 @@ enum EpdRotation { /// Possible failures when drawing. enum EpdDrawError { - EPD_DRAW_SUCCESS = 0x0, - /// No valid framebuffer packing mode was specified. - EPD_DRAW_INVALID_PACKING_MODE = 0x1, + EPD_DRAW_SUCCESS = 0x0, + /// No valid framebuffer packing mode was specified. + EPD_DRAW_INVALID_PACKING_MODE = 0x1, - /// No lookup table implementation for this mode / packing. - EPD_DRAW_LOOKUP_NOT_IMPLEMENTED = 0x2, + /// No lookup table implementation for this mode / packing. + EPD_DRAW_LOOKUP_NOT_IMPLEMENTED = 0x2, - /// The string to draw is invalid. - EPD_DRAW_STRING_INVALID = 0x4, + /// The string to draw is invalid. + EPD_DRAW_STRING_INVALID = 0x4, - /// The string was not empty, but no characters where drawable. - EPD_DRAW_NO_DRAWABLE_CHARACTERS = 0x8, + /// The string was not empty, but no characters where drawable. + EPD_DRAW_NO_DRAWABLE_CHARACTERS = 0x8, - /// Allocation failed - EPD_DRAW_FAILED_ALLOC = 0x10, + /// Allocation failed + EPD_DRAW_FAILED_ALLOC = 0x10, - /// A glyph could not be drawn, and not fallback was present. - EPD_DRAW_GLYPH_FALLBACK_FAILED = 0x20, + /// A glyph could not be drawn, and not fallback was present. + EPD_DRAW_GLYPH_FALLBACK_FAILED = 0x20, - /// The specified crop area is invalid. - EPD_DRAW_INVALID_CROP = 0x40, + /// The specified crop area is invalid. + EPD_DRAW_INVALID_CROP = 0x40, - /// No such mode is available with the current waveform. - EPD_DRAW_MODE_NOT_FOUND = 0x80, + /// No such mode is available with the current waveform. + EPD_DRAW_MODE_NOT_FOUND = 0x80, - /// The waveform info file contains no applicable temperature range. - EPD_DRAW_NO_PHASES_AVAILABLE = 0x100, + /// The waveform info file contains no applicable temperature range. + EPD_DRAW_NO_PHASES_AVAILABLE = 0x100, - /// An invalid combination of font flags was used. - EPD_DRAW_INVALID_FONT_FLAGS = 0x200, + /// An invalid combination of font flags was used. + EPD_DRAW_INVALID_FONT_FLAGS = 0x200, - /// The waveform lookup could not keep up with the display output. - /// - /// Reduce the display clock speed. - EPD_DRAW_EMPTY_LINE_QUEUE = 0x400, + /// The waveform lookup could not keep up with the display output. + /// + /// Reduce the display clock speed. + EPD_DRAW_EMPTY_LINE_QUEUE = 0x400, }; /// The default draw mode (non-flashy refresh, whith previously white screen). @@ -166,39 +170,43 @@ enum EpdDrawError { /// Font drawing flags enum EpdFontFlags { - /// Draw a background. - /// - /// Take the background into account - /// when calculating the size. - EPD_DRAW_BACKGROUND = 0x1, - - /// Left-Align lines - EPD_DRAW_ALIGN_LEFT = 0x2, - /// Right-align lines - EPD_DRAW_ALIGN_RIGHT = 0x4, - /// Center-align lines - EPD_DRAW_ALIGN_CENTER = 0x8, + /// Draw a background. + /// + /// Take the background into account + /// when calculating the size. + EPD_DRAW_BACKGROUND = 0x1, + + /// Left-Align lines + EPD_DRAW_ALIGN_LEFT = 0x2, + /// Right-align lines + EPD_DRAW_ALIGN_RIGHT = 0x4, + /// Center-align lines + EPD_DRAW_ALIGN_CENTER = 0x8, }; /// Font properties. typedef struct { - /// Foreground color - uint8_t fg_color : 4; - /// Background color - uint8_t bg_color : 4; - /// Use the glyph for this codepoint for missing glyphs. - uint32_t fallback_glyph; - /// Additional flags, reserved for future use - enum EpdFontFlags flags; + /// Foreground color + uint8_t fg_color : 4; + /// Background color + uint8_t bg_color : 4; + /// Use the glyph for this codepoint for missing glyphs. + uint32_t fallback_glyph; + /// Additional flags, reserved for future use + enum EpdFontFlags flags; } EpdFontProperties; #include "epd_board.h" +#include "epd_board_specific.h" #include "epd_display.h" #include "epd_highlevel.h" -#include "epd_board_specific.h" /** Initialize the ePaper display */ -void epd_init(const EpdBoardDefinition* board, const EpdDisplay_t* display, enum EpdInitOptions options); +void epd_init( + const EpdBoardDefinition* board, + const EpdDisplay_t* display, + enum EpdInitOptions options +); /** * Get the configured display. @@ -285,8 +293,7 @@ EpdRect epd_full_screen(); * @param framebuffer: The framebuffer object, * which must be `epd_width() / 2 * epd_height()` large. */ -void epd_copy_to_framebuffer(EpdRect image_area, const uint8_t *image_data, - uint8_t *framebuffer); +void epd_copy_to_framebuffer(EpdRect image_area, const uint8_t* image_data, uint8_t* framebuffer); /** * Draw a pixel a given framebuffer. @@ -296,7 +303,7 @@ void epd_copy_to_framebuffer(EpdRect image_area, const uint8_t *image_data, * @param color: The gray value of the line (see [Colors](#Colors)); * @param framebuffer: The framebuffer to draw to, */ -void epd_draw_pixel(int x, int y, uint8_t color, uint8_t *framebuffer); +void epd_draw_pixel(int x, int y, uint8_t color, uint8_t* framebuffer); /** * Draw a horizontal line to a given framebuffer. @@ -308,8 +315,7 @@ void epd_draw_pixel(int x, int y, uint8_t color, uint8_t *framebuffer); * @param framebuffer: The framebuffer to draw to, * which must be `epd_width() / 2 * epd_height()` bytes large. */ -void epd_draw_hline(int x, int y, int length, uint8_t color, - uint8_t *framebuffer); +void epd_draw_hline(int x, int y, int length, uint8_t color, uint8_t* framebuffer); /** * Draw a horizontal line to a given framebuffer. @@ -321,11 +327,17 @@ void epd_draw_hline(int x, int y, int length, uint8_t color, * @param framebuffer: The framebuffer to draw to, * which must be `epd_width() / 2 * epd_height()` bytes large. */ -void epd_draw_vline(int x, int y, int length, uint8_t color, - uint8_t *framebuffer); +void epd_draw_vline(int x, int y, int length, uint8_t color, uint8_t* framebuffer); -void epd_fill_circle_helper(int x0, int y0, int r, int corners, int delta, - uint8_t color, uint8_t *framebuffer); +void epd_fill_circle_helper( + int x0, + int y0, + int r, + int corners, + int delta, + uint8_t color, + uint8_t* framebuffer +); /** * Draw a circle with given center and radius @@ -336,7 +348,7 @@ void epd_fill_circle_helper(int x0, int y0, int r, int corners, int delta, * @param color: The gray value of the line (see [Colors](#Colors)); * @param framebuffer: The framebuffer to draw to, */ -void epd_draw_circle(int x, int y, int r, uint8_t color, uint8_t *framebuffer); +void epd_draw_circle(int x, int y, int r, uint8_t color, uint8_t* framebuffer); /** * Draw a circle with fill with given center and radius @@ -347,7 +359,7 @@ void epd_draw_circle(int x, int y, int r, uint8_t color, uint8_t *framebuffer); * @param color: The gray value of the line (see [Colors](#Colors)); * @param framebuffer: The framebuffer to draw to, */ -void epd_fill_circle(int x, int y, int r, uint8_t color, uint8_t *framebuffer); +void epd_fill_circle(int x, int y, int r, uint8_t color, uint8_t* framebuffer); /** * Draw a rectanle with no fill color @@ -356,7 +368,7 @@ void epd_fill_circle(int x, int y, int r, uint8_t color, uint8_t *framebuffer); * @param color: The gray value of the line (see [Colors](#Colors)); * @param framebuffer: The framebuffer to draw to, */ -void epd_draw_rect(EpdRect rect, uint8_t color, uint8_t *framebuffer); +void epd_draw_rect(EpdRect rect, uint8_t color, uint8_t* framebuffer); /** * Draw a rectanle with fill color @@ -365,7 +377,7 @@ void epd_draw_rect(EpdRect rect, uint8_t color, uint8_t *framebuffer); * @param color: The gray value of the line (see [Colors](#Colors)); * @param framebuffer: The framebuffer to draw to, */ -void epd_fill_rect(EpdRect rect, uint8_t color, uint8_t *framebuffer); +void epd_fill_rect(EpdRect rect, uint8_t color, uint8_t* framebuffer); /** * Draw a line @@ -377,8 +389,7 @@ void epd_fill_rect(EpdRect rect, uint8_t color, uint8_t *framebuffer); * @param color: The gray value of the line (see [Colors](#Colors)); * @param framebuffer: The framebuffer to draw to, */ -void epd_draw_line(int x0, int y0, int x1, int y1, uint8_t color, - uint8_t *framebuffer); +void epd_draw_line(int x0, int y0, int x1, int y1, uint8_t color, uint8_t* framebuffer); /** * Draw a triangle with no fill color @@ -392,8 +403,16 @@ void epd_draw_line(int x0, int y0, int x1, int y1, uint8_t color, * @param color: The gray value of the line (see [Colors](#Colors)); * @param framebuffer: The framebuffer to draw to, */ -void epd_draw_triangle(int x0, int y0, int x1, int y1, int x2, int y2, - uint8_t color, uint8_t *framebuffer); +void epd_draw_triangle( + int x0, + int y0, + int x1, + int y1, + int x2, + int y2, + uint8_t color, + uint8_t* framebuffer +); /** * Draw a triangle with color-fill @@ -407,8 +426,16 @@ void epd_draw_triangle(int x0, int y0, int x1, int y1, int x2, int y2, * @param color: The gray value of the line (see [Colors](#Colors)); * @param framebuffer: The framebuffer to draw to, */ -void epd_fill_triangle(int x0, int y0, int x1, int y1, int x2, int y2, - uint8_t color, uint8_t *framebuffer); +void epd_fill_triangle( + int x0, + int y0, + int x1, + int y1, + int x2, + int y2, + uint8_t color, + uint8_t* framebuffer +); /** * Get the current ambient temperature in °C, if supported by the board. * Requires the display to be powered on. @@ -424,10 +451,17 @@ EpdFontProperties epd_font_properties_default(); * Get the text bounds for string, when drawn at (x, y). * Set font properties to NULL to use the defaults. */ -void epd_get_text_bounds(const EpdFont *font, const char *string, - const int *x, const int *y, - int *x1, int *y1, int *w, int *h, - const EpdFontProperties *props); +void epd_get_text_bounds( + const EpdFont* font, + const char* string, + const int* x, + const int* y, + int* x1, + int* y1, + int* w, + int* h, + const EpdFontProperties* props +); /*! * Returns a rect with the bounds of the text * @param font : the font used to get the character sizes @@ -438,27 +472,42 @@ void epd_get_text_bounds(const EpdFont *font, const char *string, * @returns EpdRect with x and y as per the original and height and width * adjusted to fit the text with the margin added as well. */ -EpdRect epd_get_string_rect (const EpdFont *font, const char *string, - int x, int y, int margin, const EpdFontProperties *properties ); +EpdRect epd_get_string_rect( + const EpdFont* font, + const char* string, + int x, + int y, + int margin, + const EpdFontProperties* properties +); /** * Write text to the EPD. */ -enum EpdDrawError epd_write_string(const EpdFont *font, const char *string, int *cursor_x, - int *cursor_y, uint8_t *framebuffer, - const EpdFontProperties *properties); +enum EpdDrawError epd_write_string( + const EpdFont* font, + const char* string, + int* cursor_x, + int* cursor_y, + uint8_t* framebuffer, + const EpdFontProperties* properties +); /** * Write a (multi-line) string to the EPD. */ -enum EpdDrawError epd_write_default(const EpdFont *font, const char *string, int *cursor_x, - int *cursor_y, uint8_t *framebuffer); +enum EpdDrawError epd_write_default( + const EpdFont* font, + const char* string, + int* cursor_x, + int* cursor_y, + uint8_t* framebuffer +); /** * Get the font glyph for a unicode code point. */ -const EpdGlyph* epd_get_glyph(const EpdFont *font, uint32_t code_point); - +const EpdGlyph* epd_get_glyph(const EpdFont* font, uint32_t code_point); /** * Darken / lighten an area for a given time. @@ -488,17 +537,23 @@ void epd_push_pixels(EpdRect area, short time, int color); * @param drawn_lines: If not NULL, an array of at least the height of the * image. Every line where the corresponding value in `lines` is `false` will be * skipped. + * @param drawn_columns: If not NULL, an array of at least the width of the + * image / 2, 16-byte aligned. + * The image will only be updated in pixel columns where the corresponding nibbles are non-zero. * @param waveform: The waveform information to use for drawing. * If you don't have special waveforms, use `EPD_BUILTIN_WAVEFORM`. * @returns `EPD_DRAW_SUCCESS` on sucess, a combination of error flags otherwise. */ -enum EpdDrawError epd_draw_base(EpdRect area, - const uint8_t *data, - EpdRect crop_to, - enum EpdDrawMode mode, - int temperature, - const bool *drawn_lines, - const EpdWaveform *waveform); +enum EpdDrawError epd_draw_base( + EpdRect area, + const uint8_t* data, + EpdRect crop_to, + enum EpdDrawMode mode, + int temperature, + const bool* drawn_lines, + const uint8_t* drawn_columns, + const EpdWaveform* waveform +); /** * Calculate a `MODE_PACKING_1PPB_DIFFERENCE` difference image * from two `MODE_PACKING_2PPB` (4 bit-per-pixel) buffers. @@ -512,10 +567,9 @@ enum EpdDrawError epd_draw_base(EpdRect area, * @param dirty_lines: An array of at least `epd_height()`. * The positions corresponding to lines where `to` and `from` differ * are set to `true`, otherwise to `false`. - * @param previously_white: If not NULL, it is set to `true` - * if the considered crop of the `from`-image is completely white. - * @param previously_black: If not NULL, it is set to `true` - * if the considered crop of the `from`-image is completely black. + * @param col_dirtyness: An array of at least `epd_width() / 2`. + * If a nibble is set to non-zero, the pixel column is marked as changed, aka "dirty." + * The buffer must be 16 byte aligned. * @returns The smallest rectangle containing all changed pixels. */ EpdRect epd_difference_image_cropped( @@ -524,8 +578,7 @@ EpdRect epd_difference_image_cropped( EpdRect crop_to, uint8_t* interlaced, bool* dirty_lines, - bool* previously_white, - bool* previously_black + uint8_t* col_dirtiness ); /** @@ -534,7 +587,13 @@ EpdRect epd_difference_image_cropped( * * See `epd_difference_image_cropped() for details.` */ -EpdRect epd_difference_image(const uint8_t* to, const uint8_t* from, uint8_t* interlaced, bool* dirty_lines); +EpdRect epd_difference_image( + const uint8_t* to, + const uint8_t* from, + uint8_t* interlaced, + bool* dirty_lines, + uint8_t* col_dirtiness +); /** * Return the pixel color of a 4 bit image array @@ -542,19 +601,23 @@ EpdRect epd_difference_image(const uint8_t* to, const uint8_t* from, uint8_t* in * fb_width, fb_height dimensions * @returns uint8_t 0-255 representing the color on given coordinates (as in epd_draw_pixel) */ -uint8_t epd_get_pixel(int x, int y, int fb_width, int fb_height, const uint8_t *framebuffer); +uint8_t epd_get_pixel(int x, int y, int fb_width, int fb_height, const uint8_t* framebuffer); /** * Draw an image reading pixel per pixel and being rotation aware (via epd_draw_pixel) */ -void epd_draw_rotated_image(EpdRect image_area, const uint8_t *image_buffer, uint8_t *framebuffer); +void epd_draw_rotated_image(EpdRect image_area, const uint8_t* image_buffer, uint8_t* framebuffer); /** * Draw an image reading pixel per pixel and being rotation aware (via epd_draw_pixel) * With an optional transparent color (color key transparency) */ -void epd_draw_rotated_transparent_image(EpdRect image_area, const uint8_t *image_buffer, uint8_t *framebuffer, uint8_t transparent_color) ; - +void epd_draw_rotated_transparent_image( + EpdRect image_area, + const uint8_t* image_buffer, + uint8_t* framebuffer, + uint8_t transparent_color +); /** * Override the pixel clock when using the LCD driver for display output (Epdiy V7+). diff --git a/src/highlevel.c b/src/highlevel.c index a7625666..4bc63efe 100644 --- a/src/highlevel.c +++ b/src/highlevel.c @@ -2,8 +2,6 @@ * High-level API implementation for epdiy. */ -#include "epd_highlevel.h" -#include "epdiy.h" #include #include #include @@ -11,6 +9,9 @@ #include #include +#include "epd_highlevel.h" +#include "epdiy.h" + #ifndef _swap_int #define _swap_int(a, b) \ { \ @@ -34,14 +35,16 @@ EpdiyHighlevelState epd_hl_init(const EpdWaveform* waveform) { ESP_LOGW("EPDiy", "Please enable PSRAM for the ESP32 (menuconfig→ Component config→ ESP32-specific)"); #endif EpdiyHighlevelState state; - state.back_fb = heap_caps_malloc(fb_size, MALLOC_CAP_SPIRAM); + state.back_fb = heap_caps_aligned_alloc(16, fb_size, MALLOC_CAP_SPIRAM); assert(state.back_fb != NULL); - state.front_fb = heap_caps_malloc(fb_size, MALLOC_CAP_SPIRAM); + state.front_fb = heap_caps_aligned_alloc(16, fb_size, MALLOC_CAP_SPIRAM); assert(state.front_fb != NULL); - state.difference_fb = heap_caps_malloc(2 * fb_size, MALLOC_CAP_SPIRAM); + state.difference_fb = heap_caps_aligned_alloc(16, 2 * fb_size, MALLOC_CAP_SPIRAM); assert(state.difference_fb != NULL); state.dirty_lines = malloc(epd_height() * sizeof(bool)); assert(state.dirty_lines != NULL); + state.dirty_columns = heap_caps_aligned_alloc(16, epd_width() / 2, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + assert(state.dirty_columns != NULL); state.waveform = waveform; memset(state.front_fb, 0xFF, fb_size); @@ -102,8 +105,6 @@ enum EpdDrawError epd_hl_update_area(EpdiyHighlevelState* state, enum EpdDrawMod assert(state != NULL); // Not right to rotate here since this copies part of buffer directly - bool previously_white = false; - bool previously_black = false; // Check rotation FIX EpdRect rotated_area = _inverse_rotated_area(area.x, area.y, area.width, area.height); area.x = rotated_area.x; @@ -111,61 +112,66 @@ enum EpdDrawError epd_hl_update_area(EpdiyHighlevelState* state, enum EpdDrawMod area.width = rotated_area.width; area.height = rotated_area.height; - ESP_LOGI("epdiy", "calculating diff.."); - //FIXME: use crop information here, if available + uint32_t ts = esp_timer_get_time() / 1000; + + // FIXME: use crop information here, if available EpdRect diff_area = epd_difference_image_cropped( state->front_fb, state->back_fb, area, state->difference_fb, state->dirty_lines, - &previously_white, - &previously_black + state->dirty_columns ); - - ESP_LOGI("epdiy", "highlevel diff area: x: %d, y: %d, w: %d, h: %d", diff_area.x, diff_area.y, diff_area.width, diff_area.height); - + if (diff_area.height == 0 || diff_area.width == 0) { return EPD_DRAW_SUCCESS; } uint32_t t1 = esp_timer_get_time() / 1000; - previously_white = false; - previously_black = false; - diff_area.x = 0; diff_area.y = 0; diff_area.width = epd_width(); diff_area.height = epd_height(); - enum EpdDrawError err; - if (previously_white) { - err = epd_draw_base(epd_full_screen(), state->front_fb, diff_area, MODE_PACKING_2PPB | PREVIOUSLY_WHITE | mode, temperature, state->dirty_lines, state->waveform); - } else if (previously_black) { - err = epd_draw_base(epd_full_screen(), state->front_fb, diff_area, MODE_PACKING_2PPB | PREVIOUSLY_BLACK | mode, temperature, state->dirty_lines, state->waveform); - } else { - err = epd_draw_base(epd_full_screen(), state->difference_fb, diff_area, MODE_PACKING_1PPB_DIFFERENCE | mode, temperature, state->dirty_lines, state->waveform); - } - + enum EpdDrawError err = EPD_DRAW_SUCCESS; + err = epd_draw_base(epd_full_screen(), state->difference_fb, diff_area, MODE_PACKING_1PPB_DIFFERENCE | mode, temperature, state->dirty_lines, state->dirty_columns, state->waveform); uint32_t t2 = esp_timer_get_time() / 1000; - printf("actual draw took %dms.\n", t2 - t1); + + diff_area.x = 0; + diff_area.y = 0; + diff_area.width = epd_width(); + diff_area.height = epd_height(); + + int buf_width = epd_width(); for (int l=diff_area.y; l < diff_area.y + diff_area.height; l++) { if (state->dirty_lines[l] > 0) { - uint8_t* lfb = state->front_fb + epd_width() / 2 * l; - uint8_t* lbb = state->back_fb + epd_width() / 2 * l; - - for (int x=diff_area.x; x < diff_area.x + diff_area.width; x++) { - if (x % 2) { - *(lbb + x / 2) = (*(lfb + x / 2) & 0xF0) | (*(lbb + x / 2) & 0x0F); - } else { - *(lbb + x / 2) = (*(lfb + x / 2) & 0x0F) | (*(lbb + x / 2) & 0xF0); - } + uint8_t* lfb = state->front_fb + buf_width / 2 * l; + uint8_t* lbb = state->back_fb + buf_width / 2 * l; + + int x = diff_area.x; + int x_last = diff_area.x + diff_area.width - 1; + + if (x % 2) { + *(lbb + x / 2) = (*(lfb + x / 2) & 0xF0) | (*(lbb + x / 2) & 0x0F); + x += 1; + } + + if (x_last % 2) { + *(lbb + x_last / 2) = (*(lfb + x_last / 2) & 0x0F) | (*(lbb + x_last / 2) & 0xF0); + x_last -= 1; } + + memcpy(lbb + (x / 2), lfb + (x / 2), (x_last - x) / 2); } } + + uint32_t t3 = esp_timer_get_time() / 1000; + + ESP_LOGI("epdiy", "diff: %dms, draw: %dms, buffer update: %dms, total: %dms", t1 - ts, t2 - t1, t3 - t2, t3 - ts); return err; } diff --git a/src/output_common/line_queue.c b/src/output_common/line_queue.c index 4a86128f..c0a2ae7a 100644 --- a/src/output_common/line_queue.c +++ b/src/output_common/line_queue.c @@ -1,7 +1,55 @@ #include #include +#include +#include +#include #include "line_queue.h" +#include "render_method.h" + +static inline int ceil_div(int x, int y) { return x / y + (x % y != 0); } + +/// Initialize the line queue and allocate memory. +LineQueue_t lq_init(int queue_len, int element_size, bool use_mask) { + LineQueue_t queue; + queue.element_size = element_size; + queue.size = queue_len; + queue.current = 0; + queue.last = 0; + + int elem_buf_size = ceil_div(element_size, 16) * 16; + + queue.bufs = calloc(queue.size, elem_buf_size); + assert(queue.bufs != NULL); + + for (int i=0; isize; i++) { + heap_caps_free(queue->bufs[i]); + } + + if (queue->mask_buffer != NULL) { + heap_caps_free(queue->mask_buffer); + } + free(queue->bufs); +} uint8_t* IRAM_ATTR lq_current(LineQueue_t* queue) { int current = atomic_load_explicit(&queue->current, memory_order_acquire); @@ -10,17 +58,27 @@ uint8_t* IRAM_ATTR lq_current(LineQueue_t* queue) { if ((current + 1) % queue->size == last) { return NULL; } - return &queue->buf[current * queue->element_size]; + return queue->bufs[current]; } void IRAM_ATTR lq_commit(LineQueue_t* queue) { int current = atomic_load_explicit(&queue->current, memory_order_acquire); + if (current == queue->size - 1) { queue->current = 0; } else { atomic_fetch_add(&queue->current, 1); } + +#ifdef RENDER_METHOD_LCD + void epd_apply_line_mask_VE(uint8_t *line, const uint8_t *mask, int mask_len); + epd_apply_line_mask_VE(queue->bufs[current], queue->mask_buffer, queue->mask_buffer_len); +#else + for (int i=0; i < queue->mask_buffer_len / 4; i++) { + ((uint32_t*)(queue->bufs[current]))[i] &= ((uint32_t*)(queue->mask_buffer))[i]; + } +#endif } int IRAM_ATTR lq_read(LineQueue_t* queue, uint8_t* dst) { @@ -31,7 +89,7 @@ int IRAM_ATTR lq_read(LineQueue_t* queue, uint8_t* dst) { return -1; } - memcpy(dst, &queue->buf[last * queue->element_size], queue->element_size); + memcpy(dst, queue->bufs[last], queue->element_size); if (last == queue->size - 1) { queue->last = 0; diff --git a/src/output_common/line_queue.h b/src/output_common/line_queue.h index 6b342c77..b9c3cc5e 100644 --- a/src/output_common/line_queue.h +++ b/src/output_common/line_queue.h @@ -2,18 +2,32 @@ #include #include +#include #include -/// Circular line queue with atomic read / write operations. +/// Circular line queue with atomic read / write operations +/// and accelerated masking on the output buffer. typedef struct { int size; atomic_int current; atomic_int last; - uint8_t* buf; + uint8_t** bufs; + // size of an element size_t element_size; + //size of the mask buffer + size_t mask_buffer_len; + // mask to appyl to the output buffer, NULL if none. + // mut be elem_buf_size long. + uint8_t* mask_buffer; } LineQueue_t; +/// Initialize the line queue and allocate memory. +LineQueue_t lq_init(int queue_len, int element_size, bool use_mask); + +/// Deinitialize the line queue and free memory. +void lq_free(LineQueue_t* queue); + /// Pointer to the next empty element in the line queue. /// /// NULL if the queue is currently full. diff --git a/src/output_common/lut.S b/src/output_common/lut.S new file mode 100644 index 00000000..fd6af856 --- /dev/null +++ b/src/output_common/lut.S @@ -0,0 +1,145 @@ +#include +#include +#include "render_method.h" + +#ifdef RENDER_METHOD_LCD + +.text +.align 4 +.global calc_epd_input_1ppB_1k_S3_VE_aligned +.type calc_epd_input_1ppB_1k_S3_VE_aligned,@function + +// // CRASH AND BURN for debugging +// EE.MOVI.32.A q3, a2, 0 +// EE.MOVI.32.A q3, a3, 1 +// EE.MOVI.32.A q3, a4, 2 +// EE.MOVI.32.A q3, a5, 3 +// l8ui a10, a10, 0 + +// void calc_epd_input_1ppB_1k_S3_VE_aligned( +// const uint32_t *ld, +// uint8_t *epd_input, +// const uint8_t *conversion_lut, +// uint32_t epd_width +//); +calc_epd_input_1ppB_1k_S3_VE_aligned: +// input - a2 +// output - a3 +// lut - a4 +// len - a5 + + entry a1, 32 + + // divide by 16 and do one loop lesss, + // because the last loop is special + srli a5, a5, 4 + addi.n a5, a5, -1 + + + // bitmasks for bit shift by multiplication + movi.n a10, 0x40001000 + EE.MOVI.32.Q q4,a10,0 + movi.n a10, 0x04000100 + EE.MOVI.32.Q q4,a10,1 + movi.n a10, 0x00400010 + EE.MOVI.32.Q q4,a10,2 + movi a10, 0x00040001 + EE.MOVI.32.Q q4,a10,3 + + EE.ZERO.Q q0 + + EE.VLD.128.IP q1, a2, 16 + + // Instructions sometimes are in an unexpected order + // for best pipeline utilization + loopnez a5, .loop_end_lut_lookup + + // q1, q0 contain the input bytes, zero-extended to bits bytes + EE.VZIP.8 q1, q0 + + // load 32-bit LUT results + EE.LDXQ.32 q2, q0, a4, 0, 6 + EE.LDXQ.32 q2, q0, a4, 1, 7 + EE.LDXQ.32 q2, q0, a4, 2, 4 + EE.LDXQ.32 q2, q0, a4, 3, 5 + EE.LDXQ.32 q3, q0, a4, 0, 2 + EE.LDXQ.32 q3, q0, a4, 1, 3 + EE.LDXQ.32 q3, q0, a4, 2, 0 + EE.LDXQ.32 q3, q0, a4, 3, 1 + + EE.ZERO.ACCX + + // zip to have 16bit LUT results in q2, q3 zeroes + EE.VUNZIP.16 q2, q3 + + // combine results with using multiply-add as shift-or + EE.VMULAS.U16.ACCX q2,q4 + + // load 32-bit LUT results + EE.LDXQ.32 q2, q1, a4, 0, 6 + EE.LDXQ.32 q2, q1, a4, 1, 7 + EE.LDXQ.32 q2, q1, a4, 2, 4 + EE.LDXQ.32 q2, q1, a4, 3, 5 + EE.LDXQ.32 q0, q1, a4, 0, 2 + EE.LDXQ.32 q0, q1, a4, 1, 3 + EE.LDXQ.32 q0, q1, a4, 2, 0 + EE.LDXQ.32 q0, q1, a4, 3, 1 + + // store multiplication result in a6 + RUR.ACCX_0 a6 + s16i a6, a3, 2 + + EE.ZERO.ACCX + + // zip to have 16bit LUT results in q2, q0 zeroes + EE.VUNZIP.16 q2, q0 + + // Combine second set of results and load the next data + EE.VMULAS.U16.ACCX.LD.IP q1, a2, 16, q2, q4 + + // store result in a6 + RUR.ACCX_0 a6 + s16i a6, a3, 0 + + addi.n a3, a3, 4 +.loop_end_lut_lookup: + + // Same as above, but in the last iteration + // we do not load to not access out of bounds. + EE.VZIP.8 q1, q0 + + EE.LDXQ.32 q2, q0, a4, 0, 6 + EE.LDXQ.32 q2, q0, a4, 1, 7 + EE.LDXQ.32 q2, q0, a4, 2, 4 + EE.LDXQ.32 q2, q0, a4, 3, 5 + EE.LDXQ.32 q3, q0, a4, 0, 2 + EE.LDXQ.32 q3, q0, a4, 1, 3 + EE.LDXQ.32 q3, q0, a4, 2, 0 + EE.LDXQ.32 q3, q0, a4, 3, 1 + + EE.ZERO.ACCX + EE.VUNZIP.16 q2, q3 + EE.VMULAS.U16.ACCX q2,q4 + + EE.LDXQ.32 q2, q1, a4, 0, 6 + EE.LDXQ.32 q2, q1, a4, 1, 7 + EE.LDXQ.32 q2, q1, a4, 2, 4 + EE.LDXQ.32 q2, q1, a4, 3, 5 + EE.LDXQ.32 q0, q1, a4, 0, 2 + EE.LDXQ.32 q0, q1, a4, 1, 3 + EE.LDXQ.32 q0, q1, a4, 2, 0 + EE.LDXQ.32 q0, q1, a4, 3, 1 + + RUR.ACCX_0 a6 + s16i a6, a3, 2 + EE.ZERO.ACCX + + EE.VUNZIP.16 q2, q0 + EE.VMULAS.U16.ACCX q2, q4 + RUR.ACCX_0 a6 + s16i a6, a3, 0 + + movi.n a2, 0 // return status ESP_OK + retw.n + +#endif \ No newline at end of file diff --git a/src/output_common/lut.c b/src/output_common/lut.c index 722b7f21..8e163abb 100644 --- a/src/output_common/lut.c +++ b/src/output_common/lut.c @@ -1,8 +1,10 @@ #include "lut.h" +#include "epdiy.h" #include "render_method.h" #include "render_context.h" +#include #include #include "esp_system.h" // for ESP_IDF_VERSION_VAL @@ -151,49 +153,56 @@ void IRAM_ATTR calc_epd_input_4bpp_lut_64k( } /** - * Look up 4 pixels of a differential image. + * Look up 4 pixels of a differential image in a LUT constructed for use with vector extensions. */ __attribute__((optimize("O3"))) -static inline uint8_t lookup_differential_pixels(const uint32_t in, const uint8_t *conversion_lut) { - uint8_t out = conversion_lut[(in >> 24) & 0xFF]; - out |= (conversion_lut + 0x100)[(in >> 16) & 0xFF]; - out |= (conversion_lut + 0x200)[(in >> 8) & 0xFF]; - out |= (conversion_lut + 0x300)[in & 0xFF]; +static inline uint8_t lookup_pixels_in_VE_LUT(const uint32_t in, const uint8_t *conversion_lut) { + uint32_t* padded_lut = (uint32_t*)conversion_lut; + uint8_t out = padded_lut[(in >> 24) & 0xFF] << 6; + out |= padded_lut[(in >> 16) & 0xFF] << 4; + out |= padded_lut[(in >> 8) & 0xFF] << 2; + out |= padded_lut[(in >> 0 )& 0xFF]; return out; } + /** - * Calculate EPD input for a difference image with one pixel per byte. - */ -__attribute__((optimize("O3"))) -void IRAM_ATTR calc_epd_input_1ppB( - const uint32_t *ld, - uint8_t *epd_input, - const uint8_t *conversion_lut, - uint32_t epd_width -) { +* Lookup accelerated by the S3 Vector Extensions. +* Expects aligned buffers and a length that is divisible by 16. +*/ +void IRAM_ATTR calc_epd_input_1ppB_1k_S3_VE_aligned(const uint32_t *ld, uint8_t *epd_input, const uint8_t *conversion_lut, uint32_t epd_width); - // this is reversed for little-endian, but this is later compensated - // through the output peripheral. - for (uint32_t j = 0; j < epd_width / 4; j += 4) { -#ifdef RENDER_METHOD_LCD - epd_input[j + 0] = lookup_differential_pixels(*(ld++), conversion_lut); - epd_input[j + 1] = lookup_differential_pixels(*(ld++), conversion_lut); - epd_input[j + 2] = lookup_differential_pixels(*(ld++), conversion_lut); - epd_input[j + 3] = lookup_differential_pixels(*(ld++), conversion_lut); -#elif RENDER_METHOD_I2S - epd_input[j + 2] = lookup_differential_pixels(*(ld++), conversion_lut); - epd_input[j + 3] = lookup_differential_pixels(*(ld++), conversion_lut); - epd_input[j + 0] = lookup_differential_pixels(*(ld++), conversion_lut); - epd_input[j + 1] = lookup_differential_pixels(*(ld++), conversion_lut); -#endif - } + +/** +* Lookup accelerated by the S3 Vector Extensions. +* Uses a 1K padded LUT (each entry takes up 32 bits) +*/ +void IRAM_ATTR calc_epd_input_1ppB_1k_S3_VE(const uint32_t *ld, uint8_t *epd_input, const uint8_t *conversion_lut, uint32_t epd_width) { + // alignment boundaries in pixels + int unaligned_len_front = (16 - (uint32_t)ld % 16) % 16; + int unaligned_len_back = ((uint32_t)ld + epd_width) % 16; + int aligned_len = epd_width - unaligned_len_front - unaligned_len_back; + + if (unaligned_len_front) { + for (int i=0; i< unaligned_len_front / 4; i++) { + (*epd_input++) = lookup_pixels_in_VE_LUT((*ld++), conversion_lut); + } + } + calc_epd_input_1ppB_1k_S3_VE_aligned(ld, epd_input, conversion_lut, aligned_len); + + ld += aligned_len / 4; + epd_input += aligned_len / 4; + + if (unaligned_len_back) { + for (int i=0; i< unaligned_len_back / 4; i++) { + (*epd_input++) = lookup_pixels_in_VE_LUT((*ld++), conversion_lut); + } + } } /** * Calculate EPD input for a difference image with one pixel per byte. */ - __attribute__((optimize("O3"))) void IRAM_ATTR calc_epd_input_1ppB_64k( const uint32_t *ld, @@ -223,7 +232,6 @@ void IRAM_ATTR calc_epd_input_1ppB_64k( #endif } - /** * Look up 4 pixels in a 1K LUT with fixed "from" value. */ @@ -373,6 +381,29 @@ static void IRAM_ATTR waveform_lut_64k( } } +/** + * A 32bit aligned lookup table for lookup using the ESP32-S3 vector extensions. + */ +__attribute__((optimize("O3"))) +static void IRAM_ATTR waveform_lut_S3_VE( + uint8_t *lut, + const EpdWaveformPhases *phases, + int frame +) { + uint32_t* lut32 = (uint32_t*) lut; + const uint8_t *p_lut = phases->luts + (16 * 4 * frame); + for (uint8_t to = 0; to < 16; to++) { + for (uint8_t from_packed = 0; from_packed < 4; from_packed++) { + uint8_t index = (to << 4) | (from_packed * 4); + uint8_t packed = *(p_lut++); + lut32[index] = (packed >> 6) & 3; + lut32[index + 1] = (packed >> 4) & 3; + lut32[index + 2] = (packed >> 2) & 3; + lut32[index + 3] = (packed >> 0) & 3; + } + } +} + /** * Build a 16-bit LUT from the waveform if the previous color is * known, e.g. all white or all black. @@ -428,6 +459,12 @@ static void IRAM_ATTR waveform_lut_static_from( */ __attribute__((optimize("O3"))) void mask_line_buffer(uint8_t* lb, int line_buf_len, int xmin, int xmax) { +#ifdef RENDER_METHOD_I2S + const int offset_table[4] = {2, 3, 0, 1}; +#else + const int offset_table[4] = {0, 1, 2, 3}; +#endif + // lower bound to where byte order is not an issue. int memset_start = (xmin / 16) * 4; int memset_end = min(((xmax + 15) / 16) * 4, line_buf_len); @@ -436,7 +473,6 @@ void mask_line_buffer(uint8_t* lb, int line_buf_len, int xmin, int xmax) { memset(lb, 0, memset_start); memset(lb + memset_end, 0, line_buf_len - memset_end); - const int offset_table[4] = {2, 3, 0, 1}; // mask unused pixels at the start of the output interval uint8_t line_start_mask = 0xFF << (2 * (xmin % 4)); @@ -479,6 +515,13 @@ enum EpdDrawError IRAM_ATTR calculate_lut( enum EpdDrawMode selected_mode = mode & 0x3F; +#ifdef RENDER_METHOD_LCD + if ((mode & MODE_PACKING_1PPB_DIFFERENCE) && !(mode & MODE_FORCE_NO_PIE)) { + waveform_lut_S3_VE(lut, phases, frame); + return EPD_DRAW_SUCCESS; + } +#endif + // two pixel per byte packing with only target color if (lut_size == (1 << 16)) { if (mode & MODE_PACKING_2PPB) { diff --git a/src/output_common/lut.h b/src/output_common/lut.h index ab73958d..9dcb30b3 100644 --- a/src/output_common/lut.h +++ b/src/output_common/lut.h @@ -61,6 +61,7 @@ void nibble_shift_buffer_right(uint8_t *buf, uint32_t len); void calc_epd_input_1ppB(const uint32_t *ld, uint8_t *epd_input, const uint8_t *conversion_lut, uint32_t epd_width); void calc_epd_input_1ppB_64k(const uint32_t *ld, uint8_t *epd_input, const uint8_t *conversion_lut, uint32_t epd_width); +void calc_epd_input_1ppB_1k_S3_VE(const uint32_t *ld, uint8_t *epd_input, const uint8_t *conversion_lut, uint32_t epd_width); uint8_t lookup_pixels_4bpp_1k(uint16_t in, const uint8_t *conversion_lut, uint8_t from, uint32_t epd_width); void calc_epd_input_1bpp(const uint32_t *line_data, uint8_t *epd_input, const uint8_t *lut, uint32_t epd_width); diff --git a/src/output_common/render_context.c b/src/output_common/render_context.c index a77a7f9e..dde34115 100644 --- a/src/output_common/render_context.c +++ b/src/output_common/render_context.c @@ -1,7 +1,10 @@ #include "render_context.h" +#include "esp_log.h" + #include "../epdiy.h" #include "lut.h" +#include "render_method.h" /// For waveforms without timing and the I2S diving method, /// the default hold time for each line is 12us @@ -26,11 +29,14 @@ lut_func_t get_lut_function(RenderContext_t* ctx) { ctx->error |= EPD_DRAW_LOOKUP_NOT_IMPLEMENTED; } } else if (mode & MODE_PACKING_1PPB_DIFFERENCE) { - if (ctx->conversion_lut_size == 1024) { - return &calc_epd_input_1ppB; - } else { +#ifdef RENDER_METHOD_LCD + return &calc_epd_input_1ppB_1k_S3_VE; +#endif + + if (ctx->conversion_lut_size == (1 << 16)) { return &calc_epd_input_1ppB_64k; } + return NULL; } else if (mode & MODE_PACKING_8PPB) { return &calc_epd_input_1bpp; } else { diff --git a/src/output_common/render_method.h b/src/output_common/render_method.h index 69e158bf..01bff60b 100644 --- a/src/output_common/render_method.h +++ b/src/output_common/render_method.h @@ -7,3 +7,10 @@ #else #error "unknown chip, cannot choose render method!" #endif + +#ifdef __clang__ +#define IRAM_ATTR +// define this if we're using clangd to make it accept the GCC builtin +void __assert_func (const char* file, int line, const char* func, + const char* failedexpr); +#endif \ No newline at end of file diff --git a/src/output_i2s/i2s_data_bus.c b/src/output_i2s/i2s_data_bus.c index a135a6ac..8e9ea21a 100644 --- a/src/output_i2s/i2s_data_bus.c +++ b/src/output_i2s/i2s_data_bus.c @@ -1,5 +1,6 @@ #include "i2s_data_bus.h" +#include "esp_intr_alloc.h" #include "sdkconfig.h" // the I2S driver is based on ESP32 registers and won't compile on the S3 @@ -299,6 +300,7 @@ void i2s_bus_init(i2s_bus_config *cfg, uint32_t epd_row_width) { } void i2s_bus_deinit() { + esp_intr_disable(gI2S_intr_handle); esp_intr_free(gI2S_intr_handle); free(i2s_state.buf_a); diff --git a/src/output_i2s/render_i2s.c b/src/output_i2s/render_i2s.c index f4a6629e..0258f33f 100644 --- a/src/output_i2s/render_i2s.c +++ b/src/output_i2s/render_i2s.c @@ -382,10 +382,6 @@ void IRAM_ATTR i2s_fetch_frame_data(RenderContext_t *ctx, int thread_id) { memcpy(buf, lp, lq->element_size); - if (line_start_x > 0 || line_end_x < ctx->display_width) { - //mask_line_buffer(line_buf, line_bytes, line_start_x, line_end_x); - } - lq_commit(lq); if (shifted) { @@ -395,6 +391,7 @@ void IRAM_ATTR i2s_fetch_frame_data(RenderContext_t *ctx, int thread_id) { } void i2s_deinit() { + rmt_pulse_deinit(); i2s_bus_deinit(); } diff --git a/src/output_i2s/rmt_pulse.c b/src/output_i2s/rmt_pulse.c index 92a5d40b..dfde274c 100644 --- a/src/output_i2s/rmt_pulse.c +++ b/src/output_i2s/rmt_pulse.c @@ -1,4 +1,5 @@ #include "../output_common/render_method.h" +#include "esp_intr_alloc.h" #ifdef RENDER_METHOD_I2S @@ -60,6 +61,12 @@ void rmt_pulse_init(gpio_num_t pin) { rmt_set_tx_intr_en(row_rmt_config.channel, true); } + +void rmt_pulse_deinit() { + esp_intr_disable(gRMT_intr_handle); + esp_intr_free(gRMT_intr_handle); +} + void IRAM_ATTR pulse_ckv_ticks(uint16_t high_time_ticks, uint16_t low_time_ticks, bool wait) { while (!rmt_tx_done) { diff --git a/src/output_i2s/rmt_pulse.h b/src/output_i2s/rmt_pulse.h index 300159ed..8fda6b7a 100644 --- a/src/output_i2s/rmt_pulse.h +++ b/src/output_i2s/rmt_pulse.h @@ -13,6 +13,12 @@ */ void rmt_pulse_init(gpio_num_t pin); + +/** +* Resets the pin and RMT peripheral, frees associated resources. +*/ +void rmt_pulse_deinit(); + /** * Outputs a single pulse (high -> low) on the configured pin. * This function will always wait for a previous call to finish. @@ -38,3 +44,4 @@ bool rmt_busy(); */ void pulse_ckv_ticks(uint16_t high_time_us, uint16_t low_time_us, bool wait); + diff --git a/src/output_lcd/lcd_driver.c b/src/output_lcd/lcd_driver.c index 6523a204..91524b62 100644 --- a/src/output_lcd/lcd_driver.c +++ b/src/output_lcd/lcd_driver.c @@ -2,74 +2,81 @@ #include "epdiy.h" #include "../output_common/render_method.h" +#include "esp_heap_caps.h" +#include "esp_intr_alloc.h" +#include "hal/gpio_types.h" #ifdef RENDER_METHOD_LCD #include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include -#include #if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0) +#include #include #include -#include -#include #include +#include +#include #else -#include #include #include #include +#include #include "idf-4-backports.h" #endif -#include -#include -#include -#include -#include -#include #include -#include #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include #include - -#include "../output_common/lut.h" +#include +#include #define TAG "epdiy" -static inline int min(int x, int y) { return x < y ? x : y; } -static inline int max(int x, int y) { return x > y ? x : y; } +static inline int min(int x, int y) { + return x < y ? x : y; +} +static inline int max(int x, int y) { + return x > y ? x : y; +} -#define S3_LCD_PIN_NUM_BK_LIGHT -1 -//#define S3_LCD_PIN_NUM_MODE 4 +#define S3_LCD_PIN_NUM_BK_LIGHT -1 +// #define S3_LCD_PIN_NUM_MODE 4 -#define LINE_BATCH 1000 -#define BOUNCE_BUF_LINES 4 +#define LINE_BATCH 1000 +#define BOUNCE_BUF_LINES 4 -#define RMT_CKV_CHAN RMT_CHANNEL_1 +#define RMT_CKV_CHAN RMT_CHANNEL_1 #if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0) -// The extern line is declared in esp-idf/components/driver/deprecated/rmt_legacy.c. It has access to RMTMEM through the rmt_private.h header -// which we can't access outside the sdk. Declare our own extern here to properly use the RMTMEM smybol defined in components/soc/[target]/ld/[target].peripherals.ld -// Also typedef the new rmt_mem_t struct to the old rmt_block_mem_t struct. Same data fields, different names -typedef rmt_mem_t rmt_block_mem_t ; +// The extern line is declared in esp-idf/components/driver/deprecated/rmt_legacy.c. It has access +// to RMTMEM through the rmt_private.h header which we can't access outside the sdk. Declare our own +// extern here to properly use the RMTMEM smybol defined in +// components/soc/[target]/ld/[target].peripherals.ld Also typedef the new rmt_mem_t struct to the +// old rmt_block_mem_t struct. Same data fields, different names +typedef rmt_mem_t rmt_block_mem_t; extern rmt_block_mem_t RMTMEM; #endif @@ -92,7 +99,7 @@ typedef struct { LcdEpdConfig_t config; - uint8_t *bounce_buffer[2]; + uint8_t* bounce_buffer[2]; // size of a single bounce buffer size_t bb_size; size_t batches; @@ -103,13 +110,21 @@ typedef struct { gdma_channel_handle_t dma_chan; // DMA descriptors pool dma_descriptor_t* dma_nodes; -} s3_lcd_t; -static s3_lcd_t lcd; + /// The number of bytes in a horizontal display register line. + int line_bytes; -/// The number of bytes in a horizontal display register line. -static int line_bytes = 0; -static int vertical_lines = 0; + // With 8 bit bus width, we need a dummy cycle before the actual data, + // because the LCD peripheral behaves weirdly. + // Also see: + // https://blog.adafruit.com/2022/06/14/esp32uesday-hacking-the-esp32-s3-lcd-peripheral/ + int dummy_bytes; + + /// The number of lines of the display + int display_lines; +} s3_lcd_t; + +static s3_lcd_t lcd = {0}; void IRAM_ATTR epd_lcd_line_source_cb(line_cb_func_t line_source, void* payload) { lcd.line_source_cb = line_source; @@ -124,23 +139,21 @@ void IRAM_ATTR epd_lcd_frame_done_cb(frame_done_func_t cb, void* payload) { static IRAM_ATTR bool fill_bounce_buffer(uint8_t* buffer) { bool task_awoken = false; - // a dummy byte is neeed in 8 bit mode to work around LCD peculiarities - int dummy_bytes = lcd.bb_size / BOUNCE_BUF_LINES - line_bytes; - - for (int i=0; i < BOUNCE_BUF_LINES; i++) { - if (lcd.line_source_cb != NULL) { - // this is strange, with 16 bit need a dummy cycle. But still, the first byte in the FIFO is correct. - // So we only need a true dummy byte in the FIFO in the 8 bit configuration. - task_awoken |= lcd.line_source_cb(lcd.line_cb_payload, &buffer[i * (line_bytes + dummy_bytes) + (dummy_bytes % 2)]); + for (int i = 0; i < BOUNCE_BUF_LINES; i++) { + if (lcd.line_source_cb != NULL) { + // this is strange, with 16 bit need a dummy cycle. But still, the first byte in the + // FIFO is correct. So we only need a true dummy byte in the FIFO in the 8 bit + // configuration. + int buffer_offset = i * (lcd.line_bytes + lcd.dummy_bytes) + (lcd.dummy_bytes % 2); + task_awoken |= lcd.line_source_cb(lcd.line_cb_payload, &buffer[buffer_offset]); } else { - memset(&buffer[i * line_bytes], 0x00, line_bytes); + memset(&buffer[i * lcd.line_bytes], 0x00, lcd.line_bytes); } } return task_awoken; } static void start_ckv_cycles(int cycles) { - rmt_ll_tx_enable_loop_count(&RMT, RMT_CKV_CHAN, true); rmt_ll_tx_enable_loop_autostop(&RMT, RMT_CKV_CHAN, true); rmt_ll_tx_set_loop_count(&RMT, RMT_CKV_CHAN, cycles); @@ -148,130 +161,84 @@ static void start_ckv_cycles(int cycles) { rmt_ll_tx_start(&RMT, RMT_CKV_CHAN); } -void IRAM_ATTR epd_lcd_start_frame() { - int initial_lines = min(LINE_BATCH, vertical_lines); - - int dummy_bytes = lcd.bb_size / BOUNCE_BUF_LINES - line_bytes; - - // hsync: pulse with, back porch, active width, front porch - int end_line = lcd.line_cycles - lcd.lcd_res_h - lcd.config.le_high_time - lcd.config.line_front_porch; - lcd_ll_set_horizontal_timing(lcd.hal.dev, - lcd.config.le_high_time - (dummy_bytes > 0), - lcd.config.line_front_porch, - // a dummy byte is neeed in 8 bit mode to work around LCD peculiarities - lcd.lcd_res_h + (dummy_bytes > 0), - end_line - ); - lcd_ll_set_vertical_timing(lcd.hal.dev, 1, 1, initial_lines, 1); - - // generate the hsync at the very beginning of line - lcd_ll_set_hsync_position(lcd.hal.dev, 1); - - //gpio_set_level(S3_LCD_PIN_NUM_MODE, 1); - - // reset FIFO of DMA and LCD, incase there remains old frame data - gdma_reset(lcd.dma_chan); - lcd_ll_stop(lcd.hal.dev); - lcd_ll_fifo_reset(lcd.hal.dev); - lcd_ll_enable_auto_next_frame(lcd.hal.dev, true); - - lcd.batches = 0; - fill_bounce_buffer(lcd.bounce_buffer[0]); - fill_bounce_buffer(lcd.bounce_buffer[1]); - - - // the start of DMA should be prior to the start of LCD engine - gdma_start(lcd.dma_chan, (intptr_t)&lcd.dma_nodes[0]); - - // enter a critical section to ensure the frame start timing is correct - taskENTER_CRITICAL(&frame_start_spinlock); - - // delay 1us is sufficient for DMA to pass data to LCD FIFO - // in fact, this is only needed when LCD pixel clock is set too high - gpio_set_level(lcd.config.bus.stv, 0); - //esp_rom_delay_us(1); - // for picture clarity, it seems to be important to start CKV at a "good" - // time, seemingly start or towards end of line. - start_ckv_cycles(initial_lines + 5); - esp_rom_delay_us(lcd.line_length_us); - gpio_set_level(lcd.config.bus.stv, 1); - esp_rom_delay_us(lcd.line_length_us); - esp_rom_delay_us(lcd.config.ckv_high_time / 10); - - // start LCD engine - lcd_ll_start(lcd.hal.dev); - - taskEXIT_CRITICAL(&frame_start_spinlock); -} - /** * Build the RMT signal according to the timing set in the lcd object. */ static void ckv_rmt_build_signal() { - int low_time = (lcd.line_length_us * 10 - lcd.config.ckv_high_time); - volatile rmt_item32_t *rmt_mem_ptr = - &(RMTMEM.chan[RMT_CKV_CHAN].data32[0]); + int low_time = (lcd.line_length_us * 10 - lcd.config.ckv_high_time); + volatile rmt_item32_t* rmt_mem_ptr = &(RMTMEM.chan[RMT_CKV_CHAN].data32[0]); rmt_mem_ptr->duration0 = lcd.config.ckv_high_time; rmt_mem_ptr->level0 = 1; rmt_mem_ptr->duration1 = low_time; rmt_mem_ptr->level1 = 0; - //rmt_mem_ptr[1] = rmt_mem_ptr[0]; - rmt_mem_ptr[1].val = 0; + rmt_mem_ptr[1].val = 0; } +/** + * Configure the RMT peripheral for use as the CKV clock. + */ static void init_ckv_rmt() { - periph_module_reset(rmt_periph_signals.groups[0].module); - periph_module_enable(rmt_periph_signals.groups[0].module); + periph_module_reset(rmt_periph_signals.groups[0].module); + periph_module_enable(rmt_periph_signals.groups[0].module); - rmt_ll_enable_periph_clock(&RMT, true); + rmt_ll_enable_periph_clock(&RMT, true); - // Divide 80MHz APB Clock by 8 -> .1us resolution delay - // idf >= 5.0 calculates the clock divider differently + // Divide 80MHz APB Clock by 8 -> .1us resolution delay + // idf >= 5.0 calculates the clock divider differently #if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0) - rmt_ll_set_group_clock_src(&RMT, RMT_CKV_CHAN, (rmt_clock_source_t)RMT_BASECLK_DEFAULT, 1, 0, 0); + rmt_ll_set_group_clock_src(&RMT, RMT_CKV_CHAN, RMT_CLK_SRC_DEFAULT, 1, 0, 0); #else - rmt_ll_set_group_clock_src(&RMT, RMT_CKV_CHAN, (rmt_clock_source_t)RMT_BASECLK_DEFAULT, 0, 0, 0); + rmt_ll_set_group_clock_src( + &RMT, RMT_CKV_CHAN, (rmt_clock_source_t)RMT_BASECLK_DEFAULT, 0, 0, 0 + ); #endif - rmt_ll_tx_set_channel_clock_div(&RMT, RMT_CKV_CHAN, 8); - rmt_ll_tx_set_mem_blocks(&RMT, RMT_CKV_CHAN, 2); - rmt_ll_enable_mem_access_nonfifo(&RMT, true); - rmt_ll_tx_fix_idle_level(&RMT, RMT_CKV_CHAN, RMT_IDLE_LEVEL_LOW, true); - rmt_ll_tx_enable_carrier_modulation(&RMT, RMT_CKV_CHAN, false); + rmt_ll_tx_set_channel_clock_div(&RMT, RMT_CKV_CHAN, 8); + rmt_ll_tx_set_mem_blocks(&RMT, RMT_CKV_CHAN, 2); + rmt_ll_enable_mem_access_nonfifo(&RMT, true); + rmt_ll_tx_fix_idle_level(&RMT, RMT_CKV_CHAN, RMT_IDLE_LEVEL_LOW, true); + rmt_ll_tx_enable_carrier_modulation(&RMT, RMT_CKV_CHAN, false); + + rmt_ll_tx_enable_loop(&RMT, RMT_CKV_CHAN, true); + + gpio_hal_iomux_func_sel(GPIO_PIN_MUX_REG[lcd.config.bus.ckv], PIN_FUNC_GPIO); + gpio_set_direction(lcd.config.bus.ckv, GPIO_MODE_OUTPUT); + esp_rom_gpio_connect_out_signal( + lcd.config.bus.ckv, rmt_periph_signals.groups[0].channels[RMT_CKV_CHAN].tx_sig, false, 0 + ); - rmt_ll_tx_enable_loop(&RMT, RMT_CKV_CHAN, true); + ckv_rmt_build_signal(); +} - gpio_hal_iomux_func_sel(GPIO_PIN_MUX_REG[lcd.config.bus.ckv], PIN_FUNC_GPIO); - gpio_set_direction(lcd.config.bus.ckv, GPIO_MODE_OUTPUT); - esp_rom_gpio_connect_out_signal(lcd.config.bus.ckv, rmt_periph_signals.groups[0].channels[RMT_CKV_CHAN].tx_sig, false, 0); +/** + * Reset the CKV RMT configuration. + */ +static void deinit_ckv_rmt() { + periph_module_reset(rmt_periph_signals.groups[0].module); + periph_module_disable(rmt_periph_signals.groups[0].module); - ckv_rmt_build_signal(); + gpio_reset_pin(lcd.config.bus.ckv); } -__attribute__((optimize("O3"))) -IRAM_ATTR static void lcd_isr_vsync(void *args) -{ +__attribute__((optimize("O3"))) IRAM_ATTR static void lcd_isr_vsync(void* args) { bool need_yield = false; uint32_t intr_status = lcd_ll_get_interrupt_status(lcd.hal.dev); lcd_ll_clear_interrupt_status(lcd.hal.dev, intr_status); if (intr_status & LCD_LL_EVENT_VSYNC_END) { - int batches_needed = vertical_lines / LINE_BATCH ; + int batches_needed = lcd.display_lines / LINE_BATCH; if (lcd.batches >= batches_needed) { lcd_ll_stop(lcd.hal.dev); - //rmt_ll_tx_stop(&RMT, RMT_CKV_CHAN); if (lcd.frame_done_cb != NULL) { (*lcd.frame_done_cb)(lcd.frame_cb_payload); } - //gpio_set_level(S3_LCD_PIN_NUM_MODE, 0); - } else { int ckv_cycles = 0; // last batch if (lcd.batches == batches_needed - 1) { lcd_ll_enable_auto_next_frame(lcd.hal.dev, false); - lcd_ll_set_vertical_timing(lcd.hal.dev, 1, 0, vertical_lines % LINE_BATCH, 10); - ckv_cycles = vertical_lines % LINE_BATCH + 10; + lcd_ll_set_vertical_timing(lcd.hal.dev, 1, 0, lcd.display_lines % LINE_BATCH, 10); + ckv_cycles = lcd.display_lines % LINE_BATCH + 10; } else { lcd_ll_set_vertical_timing(lcd.hal.dev, 1, 0, LINE_BATCH, 1); ckv_cycles = LINE_BATCH + 1; @@ -281,7 +248,6 @@ IRAM_ATTR static void lcd_isr_vsync(void *args) // skip the LCD front porch line, which is not actual data esp_rom_delay_us(lcd.line_length_us); - start_ckv_cycles(ckv_cycles); } @@ -294,22 +260,19 @@ IRAM_ATTR static void lcd_isr_vsync(void *args) }; // ISR handling bounce buffer refill -static IRAM_ATTR bool lcd_rgb_panel_eof_handler(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data) -{ - dma_descriptor_t *desc = (dma_descriptor_t *)event_data->tx_eof_desc_addr; +static IRAM_ATTR bool lcd_rgb_panel_eof_handler( + gdma_channel_handle_t dma_chan, + gdma_event_data_t* event_data, + void* user_data +) { + dma_descriptor_t* desc = (dma_descriptor_t*)event_data->tx_eof_desc_addr; // Figure out which bounce buffer to write to. // Note: what we receive is the *last* descriptor of this bounce buffer. int bb = (desc == &lcd.dma_nodes[0]) ? 0 : 1; - - bool need_yield = fill_bounce_buffer(lcd.bounce_buffer[bb]); - - return need_yield; + return fill_bounce_buffer(lcd.bounce_buffer[bb]); } static esp_err_t init_dma_trans_link() { - - ESP_LOGI(TAG, "size: %d max: %d", lcd.bb_size, DMA_DESCRIPTOR_BUFFER_MAX_SIZE); - lcd.dma_nodes[0].dw0.suc_eof = 1; lcd.dma_nodes[0].dw0.size = lcd.bb_size; lcd.dma_nodes[0].dw0.length = lcd.bb_size; @@ -330,168 +293,227 @@ static esp_err_t init_dma_trans_link() { gdma_channel_alloc_config_t dma_chan_config = { .direction = GDMA_CHANNEL_DIRECTION_TX, }; - ESP_RETURN_ON_ERROR(gdma_new_channel(&dma_chan_config, &lcd.dma_chan), TAG, "alloc DMA channel failed"); - gdma_connect(lcd.dma_chan, GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_LCD, 0)); + ESP_RETURN_ON_ERROR( + gdma_new_channel(&dma_chan_config, &lcd.dma_chan), TAG, "alloc DMA channel failed" + ); + gdma_trigger_t trigger = GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_LCD, 0); + ESP_RETURN_ON_ERROR(gdma_connect(lcd.dma_chan, trigger), TAG, "dma connect error"); gdma_transfer_ability_t ability = { .psram_trans_align = 64, .sram_trans_align = 4, }; - gdma_set_transfer_ability(lcd.dma_chan, &ability); + ESP_RETURN_ON_ERROR(gdma_set_transfer_ability(lcd.dma_chan, &ability), TAG, "dma setup error"); gdma_tx_event_callbacks_t cbs = { .on_trans_eof = lcd_rgb_panel_eof_handler, }; - gdma_register_tx_event_callbacks(lcd.dma_chan, &cbs, NULL); + ESP_RETURN_ON_ERROR( + gdma_register_tx_event_callbacks(lcd.dma_chan, &cbs, NULL), TAG, "dma setup error" + ); return ESP_OK; } +void deinit_dma_trans_link() { + gdma_reset(lcd.dma_chan); + gdma_disconnect(lcd.dma_chan); + gdma_del_channel(lcd.dma_chan); +} -static esp_err_t s3_lcd_configure_gpio() -{ +/** + * Configure LCD peripheral and auxiliary GPIOs + */ +static esp_err_t init_bus_gpio() { const int DATA_LINES[16] = { - lcd.config.bus.data_14, - lcd.config.bus.data_15, - - lcd.config.bus.data_12, - lcd.config.bus.data_13, - - - lcd.config.bus.data_10, - lcd.config.bus.data_11, - - lcd.config.bus.data_8, - lcd.config.bus.data_9, - - - lcd.config.bus.data_6, - lcd.config.bus.data_7, - - lcd.config.bus.data_4, - lcd.config.bus.data_5, - - lcd.config.bus.data_2, - lcd.config.bus.data_3, - - - lcd.config.bus.data_0, - lcd.config.bus.data_1, + lcd.config.bus.data[14], lcd.config.bus.data[15], lcd.config.bus.data[12], + lcd.config.bus.data[13], lcd.config.bus.data[10], lcd.config.bus.data[11], + lcd.config.bus.data[8], lcd.config.bus.data[9], lcd.config.bus.data[6], + lcd.config.bus.data[7], lcd.config.bus.data[4], lcd.config.bus.data[5], + lcd.config.bus.data[2], lcd.config.bus.data[3], lcd.config.bus.data[0], + lcd.config.bus.data[1], }; // connect peripheral signals via GPIO matrix for (size_t i = (16 - lcd.config.bus_width); i < 16; i++) { gpio_hal_iomux_func_sel(GPIO_PIN_MUX_REG[DATA_LINES[i]], PIN_FUNC_GPIO); gpio_set_direction(DATA_LINES[i], GPIO_MODE_OUTPUT); - esp_rom_gpio_connect_out_signal(DATA_LINES[i], - lcd_periph_signals.panels[0].data_sigs[i], false, false); + esp_rom_gpio_connect_out_signal( + DATA_LINES[i], lcd_periph_signals.panels[0].data_sigs[i], false, false + ); } gpio_hal_iomux_func_sel(GPIO_PIN_MUX_REG[lcd.config.bus.leh], PIN_FUNC_GPIO); gpio_set_direction(lcd.config.bus.leh, GPIO_MODE_OUTPUT); - esp_rom_gpio_connect_out_signal(lcd.config.bus.leh, lcd_periph_signals.panels[0].hsync_sig, false, false); + esp_rom_gpio_connect_out_signal( + lcd.config.bus.leh, lcd_periph_signals.panels[0].hsync_sig, false, false + ); gpio_hal_iomux_func_sel(GPIO_PIN_MUX_REG[lcd.config.bus.clock], PIN_FUNC_GPIO); gpio_set_direction(lcd.config.bus.clock, GPIO_MODE_OUTPUT); - esp_rom_gpio_connect_out_signal(lcd.config.bus.clock, lcd_periph_signals.panels[0].pclk_sig, false, false); + esp_rom_gpio_connect_out_signal( + lcd.config.bus.clock, lcd_periph_signals.panels[0].pclk_sig, false, false + ); gpio_hal_iomux_func_sel(GPIO_PIN_MUX_REG[lcd.config.bus.start_pulse], PIN_FUNC_GPIO); gpio_set_direction(lcd.config.bus.start_pulse, GPIO_MODE_OUTPUT); - esp_rom_gpio_connect_out_signal(lcd.config.bus.start_pulse, lcd_periph_signals.panels[0].de_sig, false, false); + esp_rom_gpio_connect_out_signal( + lcd.config.bus.start_pulse, lcd_periph_signals.panels[0].de_sig, false, false + ); + + gpio_config_t vsync_gpio_conf = { + .mode = GPIO_MODE_OUTPUT, + .pin_bit_mask = 1ull << lcd.config.bus.stv, + }; + gpio_config(&vsync_gpio_conf); + gpio_set_level(lcd.config.bus.stv, 1); return ESP_OK; } -void IRAM_ATTR epd_lcd_init(const LcdEpdConfig_t* config, int display_width, int display_height) { +/** + * Reset bus GPIO pin functions. + */ +static void deinit_bus_gpio() { + for (size_t i = (16 - lcd.config.bus_width); i < 16; i++) { + gpio_reset_pin(lcd.config.bus.data[i]); + } - memcpy(&lcd.config, config, sizeof(LcdEpdConfig_t)); + gpio_reset_pin(lcd.config.bus.leh); + gpio_reset_pin(lcd.config.bus.clock); + gpio_reset_pin(lcd.config.bus.start_pulse); + gpio_reset_pin(lcd.config.bus.stv); +} +/** + * Check if the PSRAM cache is properly configured. + */ +static void check_cache_configuration() { if (CONFIG_ESP32S3_DATA_CACHE_LINE_SIZE < 64) { - ESP_LOGE("epdiy", "cache line size is set to %d (< 64B)! This will degrade performance, please update this option in menuconfig."); - ESP_LOGE("epdiy", "If you are on arduino, you can't set this option yourself, you'll need to use a lower speed."); - ESP_LOGE("epdiy", "Reducing the pixel clock from %d to %d for now!", config->pixel_clock / 1000 / 1000, config->pixel_clock / 1000 / 1000 / 2); + ESP_LOGE( + "epdiy", + "cache line size is set to %d (< 64B)! This will degrade performance, please update " + "this option in menuconfig.", + CONFIG_ESP32S3_DATA_CACHE_LINE_SIZE + ); + ESP_LOGE( + "epdiy", + "If you are on arduino, you can't set this option yourself, you'll need to use a lower " + "speed." + ); + ESP_LOGE( + "epdiy", "Reducing the pixel clock from %d MHz to %d MHz for now!", + lcd.config.pixel_clock / 1000 / 1000, lcd.config.pixel_clock / 1000 / 1000 / 2 + ); lcd.config.pixel_clock = lcd.config.pixel_clock / 2; - // fixme: this would be nice, but doesn't work :( - //uint32_t d_autoload = Cache_Suspend_DCache(); - ///Cache_Set_DCache_Mode(CACHE_SIZE_FULL, CACHE_4WAYS_ASSOC, CACHE_LINE_SIZE_32B); - //Cache_Invalidate_DCache_All(); - //Cache_Resume_DCache(d_autoload); + // uint32_t d_autoload = Cache_Suspend_DCache(); + /// Cache_Set_DCache_Mode(CACHE_SIZE_FULL, CACHE_4WAYS_ASSOC, CACHE_LINE_SIZE_32B); + // Cache_Invalidate_DCache_All(); + // Cache_Resume_DCache(d_autoload); } +} - // assign globals - line_bytes = display_width / 4; - // Make sure the bounce buffers divide the display height evenly. - vertical_lines = (((display_height + 7) / 8) * 8); - - esp_err_t ret = ESP_OK; +/** + * Assign LCD configuration parameters from a given configuration, without allocating memory or + * touching the LCD peripheral config. + */ +static void assign_lcd_parameters_from_config( + const LcdEpdConfig_t* config, + int display_width, + int display_height +) { + // copy over the configuraiton object + memcpy(&lcd.config, config, sizeof(LcdEpdConfig_t)); - lcd.lcd_res_h = line_bytes / (lcd.config.bus_width / 8); + // Make sure the bounce buffers divide the display height evenly. + lcd.display_lines = (((display_height + 7) / 8) * 8); - gpio_config_t vsync_gpio_conf = { - .mode = GPIO_MODE_OUTPUT, - .pin_bit_mask = 1ull << lcd.config.bus.stv, - }; + lcd.line_bytes = display_width / 4; + lcd.lcd_res_h = lcd.line_bytes / (lcd.config.bus_width / 8); - gpio_config(&vsync_gpio_conf); + // With 8 bit bus width, we need a dummy cycle before the actual data, + // because the LCD peripheral behaves weirdly. + // Also see: + // https://blog.adafruit.com/2022/06/14/esp32uesday-hacking-the-esp32-s3-lcd-peripheral/ + lcd.dummy_bytes = lcd.config.bus_width / 8; - gpio_set_level(lcd.config.bus.stv, 1); - //gpio_set_level(S3_LCD_PIN_NUM_MODE, 0); + // each bounce buffer holds a number of lines with data + dummy bytes each + lcd.bb_size = BOUNCE_BUF_LINES * (lcd.line_bytes + lcd.dummy_bytes); - ESP_LOGI(TAG, "using resolution %dx%d", lcd.lcd_res_h, vertical_lines); + check_cache_configuration(); - // enable APB to access LCD registers - periph_module_enable(lcd_periph_signals.panels[0].module); - periph_module_reset(lcd_periph_signals.panels[0].module); + ESP_LOGI(TAG, "using resolution %dx%d", lcd.lcd_res_h, lcd.display_lines); +} - // each bounce buffer holds two lines of display data +/** + * Allocate buffers for LCD driver operation. + */ +static esp_err_t allocate_lcd_buffers() { + uint32_t dma_flags = MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA; - // With 8 bit bus width, we need a dummy cycle before the actual data, - // because the LCD peripheral behaves weirdly. - // Also see: - // https://blog.adafruit.com/2022/06/14/esp32uesday-hacking-the-esp32-s3-lcd-peripheral/ - int dummy_bytes = lcd.config.bus_width / 8; + // allocate bounce buffers + for (int i = 0; i < 2; i++) { + lcd.bounce_buffer[i] = heap_caps_aligned_calloc(4, 1, lcd.bb_size, dma_flags); + ESP_RETURN_ON_FALSE(lcd.bounce_buffer[i], ESP_ERR_NO_MEM, TAG, "install interrupt failed"); + } - lcd.bb_size = BOUNCE_BUF_LINES * (line_bytes + dummy_bytes); - //assert(lcd.bb_size % (line_bytes) == 1); - size_t num_dma_nodes = (lcd.bb_size + DMA_DESCRIPTOR_BUFFER_MAX_SIZE - 1) / DMA_DESCRIPTOR_BUFFER_MAX_SIZE; - ESP_LOGI(TAG, "num dma nodes: %u", num_dma_nodes); - lcd.dma_nodes = heap_caps_calloc(1, num_dma_nodes * sizeof(dma_descriptor_t) * 2, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); - ESP_GOTO_ON_FALSE(lcd.dma_nodes, ESP_ERR_NO_MEM, err, TAG, "no mem for rgb panel"); + // So far, I haven't seen any displays with > 4096 pixels per line, + // so we only need one DMA node for now. + assert(lcd.bb_size < DMA_DESCRIPTOR_BUFFER_MAX_SIZE); + lcd.dma_nodes = heap_caps_calloc(1, sizeof(dma_descriptor_t) * 2, dma_flags); + ESP_RETURN_ON_FALSE(lcd.dma_nodes, ESP_ERR_NO_MEM, TAG, "no mem for dma nodes"); + return ESP_OK; +} - // alloc bounce buffer +static void free_lcd_buffers() { for (int i = 0; i < 2; i++) { - // bounce buffer must come from SRAM - lcd.bounce_buffer[i] = heap_caps_aligned_calloc(4, 1, lcd.bb_size, MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA); - ESP_GOTO_ON_ERROR(ret, err, TAG, "install interrupt failed"); + uint8_t* buf = lcd.bounce_buffer[i]; + if (buf != NULL) { + heap_caps_free(buf); + lcd.bounce_buffer[i] = NULL; + } + } + + if (lcd.dma_nodes != NULL) { + heap_caps_free(lcd.dma_nodes); + lcd.dma_nodes = NULL; } +} + +/** + * Initialize the LCD peripheral itself and install interrupts. + */ +static esp_err_t init_lcd_peripheral() { + esp_err_t ret = ESP_OK; + + // enable APB to access LCD registers + periph_module_enable(lcd_periph_signals.panels[0].module); + periph_module_reset(lcd_periph_signals.panels[0].module); lcd_hal_init(&lcd.hal, 0); lcd_ll_enable_clock(lcd.hal.dev, true); lcd_ll_select_clk_src(lcd.hal.dev, LCD_CLK_SRC_PLL240M); - ESP_GOTO_ON_ERROR(ret, err, TAG, "set source clock failed"); - - // install interrupt service, (LCD peripheral shares the interrupt source with Camera by different mask) - int isr_flags = (ESP_INTR_FLAG_IRAM | ESP_INTR_FLAG_INTRDISABLED) | ESP_INTR_FLAG_SHARED | ESP_INTR_FLAG_LOWMED; - ret = esp_intr_alloc_intrstatus(lcd_periph_signals.panels[0].irq_id, isr_flags, - (uint32_t)lcd_ll_get_interrupt_status_reg(lcd.hal.dev), - LCD_LL_EVENT_VSYNC_END, lcd_isr_vsync, NULL, &lcd.vsync_intr); - ESP_GOTO_ON_ERROR(ret, err, TAG, "install interrupt failed"); - ret = esp_intr_alloc_intrstatus(lcd_periph_signals.panels[0].irq_id, isr_flags, - (uint32_t)lcd_ll_get_interrupt_status_reg(lcd.hal.dev), - LCD_LL_EVENT_TRANS_DONE, lcd_isr_vsync, NULL, &lcd.done_intr); + ESP_RETURN_ON_ERROR(ret, TAG, "set source clock failed"); + + // install interrupt service, (LCD peripheral shares the interrupt source with Camera by + // different mask) + int flags = ESP_INTR_FLAG_IRAM | ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_SHARED | + ESP_INTR_FLAG_LOWMED; + int source = lcd_periph_signals.panels[0].irq_id; + uint32_t status = (uint32_t)lcd_ll_get_interrupt_status_reg(lcd.hal.dev); + ret = esp_intr_alloc_intrstatus( + source, flags, status, LCD_LL_EVENT_VSYNC_END, lcd_isr_vsync, NULL, &lcd.vsync_intr + ); + ESP_RETURN_ON_ERROR(ret, TAG, "install interrupt failed"); - ESP_GOTO_ON_ERROR(ret, err, TAG, "install interrupt failed"); + status = (uint32_t)lcd_ll_get_interrupt_status_reg(lcd.hal.dev); + ret = esp_intr_alloc_intrstatus( + source, flags, status, LCD_LL_EVENT_TRANS_DONE, lcd_isr_vsync, NULL, &lcd.done_intr + ); + ESP_RETURN_ON_ERROR(ret, TAG, "install interrupt failed"); lcd_ll_fifo_reset(lcd.hal.dev); lcd_ll_reset(lcd.hal.dev); - - // install DMA service - ret = init_dma_trans_link(); - ESP_GOTO_ON_ERROR(ret, err, TAG, "install DMA failed"); - - ret = s3_lcd_configure_gpio(); - ESP_GOTO_ON_ERROR(ret, err, TAG, "configure GPIO failed"); - // pixel clock phase and polarity lcd_ll_set_clock_idle_level(lcd.hal.dev, false); lcd_ll_set_pixel_clock_edge(lcd.hal.dev, false); @@ -499,8 +521,8 @@ void IRAM_ATTR epd_lcd_init(const LcdEpdConfig_t* config, int display_width, int // enable RGB mode and set data width lcd_ll_enable_rgb_mode(lcd.hal.dev, true); lcd_ll_set_data_width(lcd.hal.dev, lcd.config.bus_width); - lcd_ll_set_phase_cycles(lcd.hal.dev, 0, (dummy_bytes > 0), 1); // enable data phase only - lcd_ll_enable_output_hsync_in_porch_region(lcd.hal.dev, false); // enable data phase only + lcd_ll_set_phase_cycles(lcd.hal.dev, 0, (lcd.dummy_bytes > 0), 1); // enable data phase only + lcd_ll_enable_output_hsync_in_porch_region(lcd.hal.dev, false); // enable data phase only // number of data cycles is controlled by DMA buffer size lcd_ll_enable_output_always_on(lcd.hal.dev, false); @@ -521,22 +543,71 @@ void IRAM_ATTR epd_lcd_init(const LcdEpdConfig_t* config, int display_width, int // enable intr esp_intr_enable(lcd.vsync_intr); esp_intr_enable(lcd.done_intr); + return ret; +} + +static void deinit_lcd_peripheral() { + // disable and free interrupts + esp_intr_disable(lcd.vsync_intr); + esp_intr_disable(lcd.done_intr); + esp_intr_free(lcd.vsync_intr); + esp_intr_free(lcd.done_intr); + + lcd_ll_fifo_reset(lcd.hal.dev); + lcd_ll_reset(lcd.hal.dev); + + periph_module_reset(lcd_periph_signals.panels[0].module); + periph_module_disable(lcd_periph_signals.panels[0].module); +} + +/** + * Configure the LCD driver for epdiy. + */ +void epd_lcd_init(const LcdEpdConfig_t* config, int display_width, int display_height) { + esp_err_t ret = ESP_OK; + assign_lcd_parameters_from_config(config, display_width, display_height); + + check_cache_configuration(); + + ret = allocate_lcd_buffers(); + ESP_GOTO_ON_ERROR(ret, err, TAG, "lcd buffer allocation failed"); + + ret = init_lcd_peripheral(); + ESP_GOTO_ON_ERROR(ret, err, TAG, "lcd peripheral init failed"); + + ret = init_dma_trans_link(); + ESP_GOTO_ON_ERROR(ret, err, TAG, "install DMA failed"); + + ret = init_bus_gpio(); + ESP_GOTO_ON_ERROR(ret, err, TAG, "configure GPIO failed"); init_ckv_rmt(); + // setup driver state epd_lcd_set_pixel_clock_MHz(lcd.config.pixel_clock / 1000 / 1000); - epd_lcd_line_source_cb(NULL, NULL); ESP_LOGI(TAG, "LCD init done."); - return; err: - // do some deconstruction - ESP_LOGI(TAG, "LCD initialization failed!"); + ESP_LOGE(TAG, "LCD initialization failed!"); abort(); } +/** + * Deinitializue the LCD driver, i.e., free resources and peripherals. + */ +void epd_lcd_deinit() { + epd_lcd_line_source_cb(NULL, NULL); + + deinit_bus_gpio(); + deinit_lcd_peripheral(); + deinit_dma_trans_link(); + free_lcd_buffers(); + deinit_ckv_rmt(); + + ESP_LOGI(TAG, "LCD deinitialized."); +} void epd_lcd_set_pixel_clock_MHz(int frequency) { lcd.config.pixel_clock = frequency * 1000 * 1000; @@ -545,13 +616,66 @@ void epd_lcd_set_pixel_clock_MHz(int frequency) { int flags = 0; uint32_t freq = lcd_hal_cal_pclk_freq(&lcd.hal, 240000000, lcd.config.pixel_clock, flags); ESP_LOGI(TAG, "pclk freq: %d Hz", freq); - lcd.line_length_us = (lcd.lcd_res_h + lcd.config.le_high_time + lcd.config.line_front_porch - 1) * 1000000 / lcd.config.pixel_clock + 1; + lcd.line_length_us = + (lcd.lcd_res_h + lcd.config.le_high_time + lcd.config.line_front_porch - 1) * 1000000 / + lcd.config.pixel_clock + + 1; lcd.line_cycles = lcd.line_length_us * lcd.config.pixel_clock / 1000000; ESP_LOGI(TAG, "line width: %dus, %d cylces", lcd.line_length_us, lcd.line_cycles); ckv_rmt_build_signal(); } +void IRAM_ATTR epd_lcd_start_frame() { + int initial_lines = min(LINE_BATCH, lcd.display_lines); + + // hsync: pulse with, back porch, active width, front porch + int end_line = + lcd.line_cycles - lcd.lcd_res_h - lcd.config.le_high_time - lcd.config.line_front_porch; + lcd_ll_set_horizontal_timing( + lcd.hal.dev, lcd.config.le_high_time - (lcd.dummy_bytes > 0), lcd.config.line_front_porch, + // a dummy byte is neeed in 8 bit mode to work around LCD peculiarities + lcd.lcd_res_h + (lcd.dummy_bytes > 0), end_line + ); + lcd_ll_set_vertical_timing(lcd.hal.dev, 1, 1, initial_lines, 1); + + // generate the hsync at the very beginning of line + lcd_ll_set_hsync_position(lcd.hal.dev, 1); + + // reset FIFO of DMA and LCD, incase there remains old frame data + gdma_reset(lcd.dma_chan); + lcd_ll_stop(lcd.hal.dev); + lcd_ll_fifo_reset(lcd.hal.dev); + lcd_ll_enable_auto_next_frame(lcd.hal.dev, true); + + lcd.batches = 0; + fill_bounce_buffer(lcd.bounce_buffer[0]); + fill_bounce_buffer(lcd.bounce_buffer[1]); + + // the start of DMA should be prior to the start of LCD engine + gdma_start(lcd.dma_chan, (intptr_t)&lcd.dma_nodes[0]); + + // enter a critical section to ensure the frame start timing is correct + taskENTER_CRITICAL(&frame_start_spinlock); + + // delay 1us is sufficient for DMA to pass data to LCD FIFO + // in fact, this is only needed when LCD pixel clock is set too high + gpio_set_level(lcd.config.bus.stv, 0); + // esp_rom_delay_us(1); + // for picture clarity, it seems to be important to start CKV at a "good" + // time, seemingly start or towards end of line. + start_ckv_cycles(initial_lines + 5); + esp_rom_delay_us(lcd.line_length_us); + gpio_set_level(lcd.config.bus.stv, 1); + esp_rom_delay_us(lcd.line_length_us); + esp_rom_delay_us(lcd.config.ckv_high_time / 10); + + // start LCD engine + lcd_ll_start(lcd.hal.dev); + + taskEXIT_CRITICAL(&frame_start_spinlock); +} + #else /// Dummy implementation to link on the old ESP32 @@ -559,4 +683,4 @@ void epd_lcd_init(const LcdEpdConfig_t* config, int display_width, int display_h assert(false); } -#endif // S3 Target +#endif // S3 Target diff --git a/src/output_lcd/lcd_driver.h b/src/output_lcd/lcd_driver.h index a84e03c3..26e5ca86 100644 --- a/src/output_lcd/lcd_driver.h +++ b/src/output_lcd/lcd_driver.h @@ -11,24 +11,7 @@ */ typedef struct { // GPIO numbers of the parallel bus pins. - gpio_num_t data_0; - gpio_num_t data_1; - gpio_num_t data_2; - gpio_num_t data_3; - gpio_num_t data_4; - gpio_num_t data_5; - gpio_num_t data_6; - gpio_num_t data_7; - - - gpio_num_t data_8; - gpio_num_t data_9; - gpio_num_t data_10; - gpio_num_t data_11; - gpio_num_t data_12; - gpio_num_t data_13; - gpio_num_t data_14; - gpio_num_t data_15; + gpio_num_t data[16]; // horizontal clock pin. gpio_num_t clock; @@ -58,6 +41,7 @@ typedef bool(*line_cb_func_t)(void*, uint8_t*); typedef void(*frame_done_func_t)(void*); void epd_lcd_init(const LcdEpdConfig_t* config, int display_width, int display_height); +void epd_lcd_deinit(); void epd_lcd_frame_done_cb(frame_done_func_t, void* payload); void epd_lcd_line_source_cb(line_cb_func_t, void* payload); void epd_lcd_start_frame(); diff --git a/src/render.c b/src/render.c index 34559bd2..ffe73962 100644 --- a/src/render.c +++ b/src/render.c @@ -1,35 +1,44 @@ #include "render.h" -#include "epdiy.h" #include "epd_board.h" #include "epd_internals.h" +#include "epdiy.h" -#include -#include -#include -#include +#include #include +#include #include #include #include #include +#include +#include +#include +#include +#include +#include "output_common/line_queue.h" +#include "output_common/lut.h" #include "output_common/render_context.h" #include "output_common/render_method.h" -#include "output_lcd/render_lcd.h" #include "output_i2s/render_i2s.h" +#include "output_lcd/render_lcd.h" -static inline int min(int x, int y) { return x < y ? x : y; } -static inline int max(int x, int y) { return x > y ? x : y; } +static inline int min(int x, int y) { + return x < y ? x : y; +} +static inline int max(int x, int y) { + return x > y ? x : y; +} const int clear_cycle_time = 12; -#define RTOS_ERROR_CHECK(x) \ - do { \ - esp_err_t __err_rc = (x); \ - if (__err_rc != pdPASS) { \ - abort(); \ - } \ +#define RTOS_ERROR_CHECK(x) \ + do { \ + esp_err_t __err_rc = (x); \ + if (__err_rc != pdPASS) { \ + abort(); \ + } \ } while (0) static RenderContext_t render_context; @@ -51,20 +60,18 @@ void epd_push_pixels(EpdRect area, short time, int color) { * closest one. * Returns -1 if the waveform does not contain any temperature range. */ -int waveform_temp_range_index(const EpdWaveform *waveform, int temperature) { +int waveform_temp_range_index(const EpdWaveform* waveform, int temperature) { int idx = 0; if (waveform->num_temp_ranges == 0) { return -1; } - while (idx < waveform->num_temp_ranges - 1 && - waveform->temp_intervals[idx].min < temperature) { + while (idx < waveform->num_temp_ranges - 1 && waveform->temp_intervals[idx].min < temperature) { idx++; } return idx; } -static int get_waveform_index(const EpdWaveform *waveform, - enum EpdDrawMode mode) { +static int get_waveform_index(const EpdWaveform* waveform, enum EpdDrawMode mode) { for (int i = 0; i < waveform->num_modes; i++) { if (waveform->mode_data[i]->type == (mode & 0x3F)) { return i; @@ -80,12 +87,42 @@ static inline int rounded_display_height() { return (((epd_height() + 7) / 8) * 8); } +/** + * Populate an output line mask from line dirtyness with one nibble per pixel. + * If the dirtyness data is NULL, set the mask to neutral. + * + * don't inline for to ensure availability in tests. + */ +void __attribute__((noinline)) +_epd_populate_line_mask(uint8_t* line_mask, const uint8_t* dirty_columns, int mask_len) { + if (dirty_columns == NULL) { + memset(line_mask, 0xFF, mask_len); + } else { + int pixels = mask_len * 4; + for (int c = 0; c < pixels / 2; c += 2) { + uint8_t mask = 0; + mask |= (dirty_columns[c + 1] & 0xF0) != 0 ? 0xC0 : 0x00; + mask |= (dirty_columns[c + 1] & 0x0F) != 0 ? 0x30 : 0x00; + mask |= (dirty_columns[c] & 0xF0) != 0 ? 0x0C : 0x00; + mask |= (dirty_columns[c] & 0x0F) != 0 ? 0x03 : 0x00; + line_mask[c / 2] = mask; + } + } +} + // FIXME: fix misleading naming: // area -> buffer dimensions // crop -> area taken out of buffer enum EpdDrawError IRAM_ATTR epd_draw_base( - EpdRect area, const uint8_t *data, EpdRect crop_to, enum EpdDrawMode mode, - int temperature, const bool *drawn_lines, const EpdWaveform *waveform) { + EpdRect area, + const uint8_t* data, + EpdRect crop_to, + enum EpdDrawMode mode, + int temperature, + const bool* drawn_lines, + const uint8_t* drawn_columns, + const EpdWaveform* waveform +) { if (waveform == NULL) { return EPD_DRAW_NO_PHASES_AVAILABLE; } @@ -95,7 +132,7 @@ enum EpdDrawError IRAM_ATTR epd_draw_base( } int waveform_index = 0; uint8_t frame_count = 0; - const EpdWaveformPhases *waveform_phases = NULL; + const EpdWaveformPhases* waveform_phases = NULL; // no waveform required for monochrome mode if (!(mode & MODE_EPDIY_MONOCHROME)) { @@ -104,8 +141,7 @@ enum EpdDrawError IRAM_ATTR epd_draw_base( return EPD_DRAW_MODE_NOT_FOUND; } - waveform_phases = - waveform->mode_data[waveform_index]->range_data[waveform_range]; + waveform_phases = waveform->mode_data[waveform_index]->range_data[waveform_range]; // FIXME: error if not present frame_count = waveform_phases->phases; } else { @@ -122,6 +158,16 @@ enum EpdDrawError IRAM_ATTR epd_draw_base( return EPD_DRAW_INVALID_CROP; } +#ifdef RENDER_METHOD_LCD + if (mode & MODE_PACKING_1PPB_DIFFERENCE && render_context.conversion_lut_size > 1 << 10) { + ESP_LOGI( + "epdiy", + "Using optimized vector implementation on the ESP32-S3, only 1k of %d LUT in use!", + render_context.conversion_lut_size + ); + } +#endif + render_context.area = area; render_context.crop_to = crop_to; render_context.waveform_range = waveform_range; @@ -142,21 +188,27 @@ enum EpdDrawError IRAM_ATTR epd_draw_base( render_context.phase_times = waveform_phases->phase_times; } - ESP_LOGI("epdiy", "starting update, phases: %d", frame_count); - #ifdef RENDER_METHOD_I2S i2s_do_update(&render_context); #elif defined(RENDER_METHOD_LCD) + for (int i = 0; i < NUM_RENDER_THREADS; i++) { + LineQueue_t* queue = &render_context.line_queues[i]; + _epd_populate_line_mask(queue->mask_buffer, drawn_columns, queue->mask_buffer_len); + } + lcd_do_update(&render_context); #endif + if (render_context.error & EPD_DRAW_EMPTY_LINE_QUEUE) { + ESP_LOGE("epdiy", "line buffer underrun occurred!"); + } + if (render_context.error != EPD_DRAW_SUCCESS) { return render_context.error; } return EPD_DRAW_SUCCESS; } - static void IRAM_ATTR render_thread(void* arg) { int thread_id = (int)arg; @@ -201,7 +253,7 @@ void epd_clear_area_cycles(EpdRect area, int cycles, int cycle_time) { void epd_renderer_init(enum EpdInitOptions options) { // Either the board should be set in menuconfig or the epd_set_board() must // be called before epd_init() - assert(epd_current_board() != NULL); + assert((epd_current_board() != NULL)); epd_current_board()->init(epd_width()); epd_control_reg_init(); @@ -212,16 +264,22 @@ void epd_renderer_init(enum EpdInitOptions options) { size_t lut_size = 0; if (options & EPD_LUT_1K) { lut_size = 1 << 10; - } else if ((options & EPD_LUT_64K) || (options == EPD_OPTIONS_DEFAULT)) { + } else if (options & EPD_LUT_64K) { lut_size = 1 << 16; + } else if (options == EPD_OPTIONS_DEFAULT) { +#ifdef RENDER_METHOD_LCD + lut_size = 1 << 10; +#else + lut_size = 1 << 16; +#endif } else { ESP_LOGE("epd", "invalid init options: %d", options); return; } - ESP_LOGE("epd", "lut size: %d", lut_size); - render_context.conversion_lut = (uint8_t *)heap_caps_malloc( - lut_size, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); + ESP_LOGI("epd", "Space used for waveform LUT: %dK", lut_size / 1024); + render_context.conversion_lut = + (uint8_t*)heap_caps_malloc(lut_size, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); if (render_context.conversion_lut == NULL) { ESP_LOGE("epd", "could not allocate LUT!"); abort(); @@ -236,8 +294,8 @@ void epd_renderer_init(enum EpdInitOptions options) { // When using the LCD peripheral, we may need padding lines to // satisfy the bounce buffer size requirements - render_context.line_threads = (uint8_t *)heap_caps_malloc( - rounded_display_height(), MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); + render_context.line_threads = + (uint8_t*)heap_caps_malloc(rounded_display_height(), MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); int queue_len = 32; if (options & EPD_FEED_QUEUE_32) { @@ -246,38 +304,43 @@ void epd_renderer_init(enum EpdInitOptions options) { queue_len = 8; } + if (render_context.conversion_lut == NULL) { + ESP_LOGE("epd", "could not allocate line mask!"); + abort(); + } + #ifdef RENDER_METHOD_LCD - size_t queue_elem_size = epd_width() / 4; + bool use_lq_mask = true; + size_t queue_elem_size = render_context.display_width / 4; #elif defined(RENDER_METHOD_I2S) + bool use_lq_mask = false; size_t queue_elem_size = epd_width(); #endif for (int i = 0; i < NUM_RENDER_THREADS; i++) { - render_context.line_queues[i].size = queue_len; - render_context.line_queues[i].element_size = queue_elem_size; - render_context.line_queues[i].current = 0; - render_context.line_queues[i].last = 0; - render_context.line_queues[i].buf = (uint8_t *)heap_caps_malloc( - queue_len * queue_elem_size, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); - assert(render_context.line_queues[i].buf != NULL); - render_context.feed_line_buffers[i] = (uint8_t *)heap_caps_malloc(render_context.display_width, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); + render_context.line_queues[i] = lq_init(queue_len, queue_elem_size, use_lq_mask); + render_context.feed_line_buffers[i] = (uint8_t*)heap_caps_malloc( + render_context.display_width, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL + ); assert(render_context.feed_line_buffers[i] != NULL); RTOS_ERROR_CHECK(xTaskCreatePinnedToCore( - render_thread, "epd_prep", 1 << 11, (void *)i, - configMAX_PRIORITIES, &render_context.feed_tasks[i], i)); - if (render_context.line_queues[i].buf == NULL) { - ESP_LOGE("epd", "could not allocate line queue!"); - abort(); - } + render_thread, "epd_prep", 1 << 12, (void*)i, configMAX_PRIORITIES, + &render_context.feed_tasks[i], i + )); } } - void epd_renderer_deinit() { const EpdBoardDefinition* epd_board = epd_current_board(); epd_board->poweroff(epd_ctrl_state()); + for (int i = 0; i < NUM_RENDER_THREADS; i++) { + vTaskDelete(render_context.feed_tasks[i]); + lq_free(&render_context.line_queues[i]); + vSemaphoreDelete(render_context.feed_done_smphr[i]); + } + #ifdef RENDER_METHOD_I2S i2s_deinit(); #endif @@ -288,57 +351,131 @@ void epd_renderer_deinit() { epd_board->deinit(); } - for (int i = 0; i < NUM_RENDER_THREADS; i++) { - free(render_context.line_queues[i].buf); - free(render_context.feed_line_buffers[i]); - vSemaphoreDelete(render_context.feed_done_smphr[i]); - vTaskDelete(render_context.feed_tasks[i]); - } - free(render_context.conversion_lut); free(render_context.line_threads); vSemaphoreDelete(render_context.frame_done); } -EpdRect epd_difference_image_base(const uint8_t *to, const uint8_t *from, - EpdRect crop_to, int fb_width, int fb_height, - uint8_t *interlaced, bool *dirty_lines, - uint8_t *from_or, uint8_t *from_and) { - assert(from_or != NULL); - assert(from_and != NULL); - // OR over all pixels of the "from"-image - *from_or = 0x00; - // AND over all pixels of the "from"-image - *from_and = 0x0F; +#ifdef RENDER_METHOD_LCD +uint32_t epd_interlace_4bpp_line_VE( + const uint8_t* to, + const uint8_t* from, + uint8_t* interlaced, + uint8_t* col_dirtyness, + int fb_width +); +#endif - uint8_t* dirty_cols = calloc(epd_width(), 1); - assert (dirty_cols != NULL); +/** + * Interlaces `len` nibbles from the buffers `to` and `from` into `interlaced`. + * In the process, tracks which nibbles differ in `col_dirtyness`. + * Returns `1` if there are differences, `0` otherwise. + * Does not require special alignment of the buffers beyond 32 bit alignment. + */ +__attribute__((optimize("O3"))) +static inline int _interlace_line_unaligned( + const uint8_t* to, + const uint8_t* from, + uint8_t* interlaced, + uint8_t* col_dirtyness, + int len +) { + int dirty = 0; + for (int x = 0; x < len; x++) { + uint8_t t = *(to + x / 2); + uint8_t f = *(from + x / 2); + t = (x % 2) ? (t >> 4) : (t & 0x0f); + f = (x % 2) ? (f >> 4) : (f & 0x0f); + col_dirtyness[x / 2] |= (t ^ f) << (4 * (x % 2)); + dirty |= (t ^ f); + interlaced[x] = (t << 4) | f; + } + return dirty; +} + +/** + * Interlaces the lines at `to`, `from` into `interlaced`. + * returns `1` if there are differences, `0` otherwise. + */ +__attribute__((optimize("O3"))) +bool _epd_interlace_line( + const uint8_t* to, + const uint8_t* from, + uint8_t* interlaced, + uint8_t* col_dirtyness, + int fb_width +) { +#ifdef RENDER_METHOD_I2S + return _interlace_line_unaligned(to, from, interlaced, col_dirtyness, fb_width) > 0; +#elif defined(RENDER_METHOD_LCD) + // Use Vector Extensions with the ESP32-S3. + // Both input buffers should have the same alignment w.r.t. 16 bytes, + // as asserted in epd_difference_image_base. + uint32_t dirty = 0; + + // alignment boundaries in pixels + int unaligned_len_front_px = ((16 - (uint32_t)to % 16) * 2) % 32; + int unaligned_len_back_px = (((uint32_t)to + fb_width / 2) % 16) * 2; + int unaligned_back_start_px = fb_width - unaligned_len_back_px; + int aligned_len_px = fb_width - unaligned_len_front_px - unaligned_len_back_px; + + dirty |= _interlace_line_unaligned(to, from, interlaced, col_dirtyness, unaligned_len_front_px); + dirty |= epd_interlace_4bpp_line_VE( + to + unaligned_len_front_px / 2, from + unaligned_len_front_px / 2, + interlaced + unaligned_len_front_px, col_dirtyness + unaligned_len_front_px / 2, + aligned_len_px + ); + dirty |= _interlace_line_unaligned( + to + unaligned_back_start_px / 2, from + unaligned_back_start_px / 2, + interlaced + unaligned_back_start_px, col_dirtyness + unaligned_back_start_px / 2, + unaligned_len_back_px + ); + return dirty; +#endif +} + +EpdRect epd_difference_image_base( + const uint8_t* to, + const uint8_t* from, + EpdRect crop_to, + int fb_width, + int fb_height, + uint8_t* interlaced, + bool* dirty_lines, + uint8_t* col_dirtyness +) { + assert(fb_width % 8 == 0); + assert(col_dirtyness != NULL); + + // these buffers should be allocated 16 byte aligned + assert((uint32_t)to % 16 == 0); + assert((uint32_t)from % 16 == 0); + assert((uint32_t)col_dirtyness % 16 == 0); + assert((uint32_t)interlaced % 16 == 0); + + memset(col_dirtyness, 0, fb_width / 2); + memset(dirty_lines, 0, sizeof(bool) * fb_height); int x_end = min(fb_width, crop_to.x + crop_to.width); int y_end = min(fb_height, crop_to.y + crop_to.height); for (int y = crop_to.y; y < y_end; y++) { - uint8_t dirty = 0; - for (int x = crop_to.x; x < x_end; x++) { - uint8_t t = *(to + y * fb_width / 2 + x / 2); - t = (x % 2) ? (t >> 4) : (t & 0x0f); - uint8_t f = *(from + y * fb_width / 2 + x / 2); - f = (x % 2) ? (f >> 4) : (f & 0x0f); - *from_or |= f; - *from_and &= f; - dirty |= (t ^ f); - dirty_cols[x] |= (t ^ f); - interlaced[y * fb_width + x] = (t << 4) | f; - } - dirty_lines[y] = dirty > 0; + uint32_t offset = y * fb_width / 2; + int dirty = _epd_interlace_line( + to + offset, from + offset, interlaced + offset * 2, col_dirtyness, fb_width + ); + dirty_lines[y] = dirty; } + int min_x, min_y, max_x, max_y; for (min_x = crop_to.x; min_x < x_end; min_x++) { - if (dirty_cols[min_x] != 0) + uint8_t mask = min_x % 2 ? 0xF0 : 0x0F; + if ((col_dirtyness[min_x / 2] & mask) != 0) break; } for (max_x = x_end - 1; max_x >= crop_to.x; max_x--) { - if (dirty_cols[max_x] != 0) + uint8_t mask = min_x % 2 ? 0xF0 : 0x0F; + if ((col_dirtyness[max_x / 2] & mask) != 0) break; } for (min_y = crop_to.y; min_y < y_end; min_y++) { @@ -349,6 +486,7 @@ EpdRect epd_difference_image_base(const uint8_t *to, const uint8_t *from, if (dirty_lines[max_y] != 0) break; } + EpdRect crop_rect = { .x = min_x, .y = min_y, @@ -356,34 +494,32 @@ EpdRect epd_difference_image_base(const uint8_t *to, const uint8_t *from, .height = max(max_y - min_y + 1, 0), }; - free(dirty_cols); return crop_rect; } -EpdRect epd_difference_image(const uint8_t *to, const uint8_t *from, - uint8_t *interlaced, bool *dirty_lines) { - uint8_t from_or = 0; - uint8_t from_and = 0; - return epd_difference_image_base(to, from, epd_full_screen(), epd_width(), - epd_height(), interlaced, dirty_lines, - &from_or, &from_and); +EpdRect epd_difference_image( + const uint8_t* to, + const uint8_t* from, + uint8_t* interlaced, + bool* dirty_lines, + uint8_t* col_dirtyness +) { + return epd_difference_image_base( + to, from, epd_full_screen(), epd_width(), epd_height(), interlaced, dirty_lines, + col_dirtyness + ); } -EpdRect epd_difference_image_cropped(const uint8_t *to, const uint8_t *from, - EpdRect crop_to, uint8_t *interlaced, - bool *dirty_lines, bool *previously_white, - bool *previously_black) { - - uint8_t from_or, from_and; - - EpdRect result = - epd_difference_image_base(to, from, crop_to, epd_width(), epd_height(), - interlaced, dirty_lines, &from_or, &from_and); - - if (previously_white != NULL) - *previously_white = (from_and == 0x0F); - if (previously_black != NULL) - *previously_black = (from_or == 0x00); +EpdRect epd_difference_image_cropped( + const uint8_t* to, + const uint8_t* from, + EpdRect crop_to, + uint8_t* interlaced, + bool* dirty_lines, + uint8_t* col_dirtyness +) { + EpdRect result = epd_difference_image_base( + to, from, crop_to, epd_width(), epd_height(), interlaced, dirty_lines, col_dirtyness + ); return result; } - diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 00000000..aeb46882 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,3 @@ +idf_component_register(SRC_DIRS "." + INCLUDE_DIRS "." + REQUIRES unity epdiy) \ No newline at end of file diff --git a/test/test_diff.c b/test/test_diff.c new file mode 100644 index 00000000..e2a924f7 --- /dev/null +++ b/test/test_diff.c @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include +#include +#include +#include "esp_timer.h" + +#define DEFAULT_EXAMPLE_LEN 704 + +bool _epd_interlace_line( + const uint8_t* to, + const uint8_t* from, + uint8_t* interlaced, + uint8_t* col_dirtyness, + int fb_width +); + +static const uint8_t from_pattern[8] = {0xFF, 0xF0, 0x0F, 0x01, 0x55, 0xAA, 0xFF, 0x80}; +static const uint8_t to_pattern[8] = {0xFF, 0xFF, 0x0F, 0x10, 0xAA, 0x55, 0xFF, 0x00}; + +static const uint8_t expected_interlaced_pattern[16] = { + 0xFF, 0xFF, 0xF0, 0xFF, 0xFF, 0x00, 0x01, 0x10, 0xA5, 0xA5, 0x5A, 0x5A, 0xFF, 0xFF, 0x00, 0x08}; +static const uint8_t expected_col_dirtyness_pattern[8] = {0x00, 0x0F, 0x00, 0x11, + 0xFF, 0xFF, 0x00, 0x80}; + +typedef struct { + uint8_t* from; + uint8_t* to; + uint8_t* interlaced; + uint8_t* col_dirtyness; + uint8_t* expected_interlaced; + uint8_t* expected_col_dirtyness; +} DiffTestBuffers; + +/** + * (Re-)fill buffers with example data, clear result buffers. + */ +static void diff_test_buffers_fill(DiffTestBuffers* bufs, int example_len) { + // initialize test and check patterns + for (int i = 0; i < example_len / 8; i++) { + memcpy(bufs->from + (8 * i), from_pattern, 8); + memcpy(bufs->to + (8 * i), to_pattern, 8); + memcpy(bufs->expected_interlaced + (16 * i), expected_interlaced_pattern, 16); + memcpy(bufs->expected_col_dirtyness + (8 * i), expected_col_dirtyness_pattern, 8); + } + + memset(bufs->col_dirtyness, 0, example_len); + memset(bufs->interlaced, 0, example_len * 2); +} + +/** + * Allocates and populates buffers for diff tests. + */ +static void diff_test_buffers_init(DiffTestBuffers* bufs, int example_len) { + bufs->from = heap_caps_aligned_alloc(16, example_len, MALLOC_CAP_DEFAULT); + bufs->to = heap_caps_aligned_alloc(16, example_len, MALLOC_CAP_DEFAULT); + bufs->interlaced = heap_caps_aligned_alloc(16, 2 * example_len, MALLOC_CAP_DEFAULT); + bufs->col_dirtyness = heap_caps_aligned_alloc(16, example_len, MALLOC_CAP_DEFAULT); + bufs->expected_interlaced = malloc(2 * example_len); + bufs->expected_col_dirtyness = malloc(example_len); + + diff_test_buffers_fill(bufs, example_len); +} + +/** + * Free buffers used for diff testing. + */ +static void diff_test_buffers_free(DiffTestBuffers* bufs) { + heap_caps_free(bufs->from); + heap_caps_free(bufs->to); + heap_caps_free(bufs->interlaced); + heap_caps_free(bufs->col_dirtyness); + free(bufs->expected_interlaced); + free(bufs->expected_col_dirtyness); +} + +TEST_CASE("simple aligned diff works", "[epdiy,unit]") { + // length of the example buffers in bytes (i.e., half the length in pixels) + const int example_len = DEFAULT_EXAMPLE_LEN; + DiffTestBuffers bufs; + bool dirty; + + diff_test_buffers_init(&bufs, example_len); + + // This should trigger use of vector extensions on the S3 + TEST_ASSERT((uint32_t)bufs.to % 16 == 0) + + // fully aligned + dirty = _epd_interlace_line( + bufs.to, bufs.from, bufs.interlaced, bufs.col_dirtyness, 2 * example_len + ); + + TEST_ASSERT(dirty == true); + TEST_ASSERT_EQUAL_UINT8_ARRAY(bufs.expected_col_dirtyness, bufs.col_dirtyness, example_len); + TEST_ASSERT_EQUAL_UINT8_ARRAY(bufs.expected_interlaced, bufs.interlaced, 2 * example_len); + + diff_test_buffers_free(&bufs); +} + +TEST_CASE("dirtynes for diff without changes is correct", "[epdiy,unit]") { + const int example_len = DEFAULT_EXAMPLE_LEN; + const uint8_t NULL_ARRAY[DEFAULT_EXAMPLE_LEN * 2] = {0}; + DiffTestBuffers bufs; + bool dirty; + + diff_test_buffers_init(&bufs, example_len); + + // This should trigger use of vector extensions on the S3 + TEST_ASSERT((uint32_t)bufs.to % 16 == 0) + + // both use "from" buffer + dirty = _epd_interlace_line( + bufs.from, bufs.from, bufs.interlaced, bufs.col_dirtyness, 2 * example_len + ); + + TEST_ASSERT(dirty == false); + TEST_ASSERT_EQUAL_UINT8_ARRAY(NULL_ARRAY, bufs.col_dirtyness, example_len); + + // both use "to" buffer, misaligned by 4 bytes + dirty = _epd_interlace_line( + bufs.to + 4, bufs.to + 4, bufs.interlaced, bufs.col_dirtyness, 2 * (example_len - 4) + ); + + TEST_ASSERT(dirty == false); + TEST_ASSERT_EQUAL_UINT8_ARRAY(NULL_ARRAY, bufs.col_dirtyness + 4, example_len - 4); + + diff_test_buffers_free(&bufs); +} + +TEST_CASE("different 4-byte alignments work", "[epdiy,unit]") { + const int example_len = DEFAULT_EXAMPLE_LEN; + const uint8_t NULL_ARRAY[DEFAULT_EXAMPLE_LEN * 2] = {0}; + DiffTestBuffers bufs; + bool dirty; + + diff_test_buffers_init(&bufs, example_len); + + // test all combinations of start / end missalignment + for (int start_offset = 0; start_offset <= 16; start_offset += 4) { + for (int end_offset = 0; end_offset <= 16; end_offset += 4) { + int unaligned_len = example_len - end_offset - start_offset; + + diff_test_buffers_fill(&bufs, example_len); + + // before and after the designated range the buffer shoulld be clear + memset(bufs.expected_col_dirtyness, 0, start_offset); + memset(bufs.expected_interlaced, 0, 2 * start_offset); + memset(bufs.expected_col_dirtyness + start_offset + unaligned_len, 0, end_offset); + memset( + bufs.expected_interlaced + (start_offset + unaligned_len) * 2, 0, end_offset * 2 + ); + + printf( + "testing with alignment (in px): (%d, %d)... ", 2 * start_offset, 2 * unaligned_len + ); + uint64_t start = esp_timer_get_time(); + + for (int i = 0; i < 100; i++) { + dirty = _epd_interlace_line( + bufs.to + start_offset, bufs.from + start_offset, + bufs.interlaced + 2 * start_offset, bufs.col_dirtyness + start_offset, + 2 * unaligned_len + ); + } + + uint64_t end = esp_timer_get_time(); + + printf("took %.2fus per iter.\n", (end - start) / 100.0); + + + + TEST_ASSERT(dirty == true); + + TEST_ASSERT_EQUAL_UINT8_ARRAY( + bufs.expected_col_dirtyness, bufs.col_dirtyness, example_len + ); + TEST_ASSERT_EQUAL_UINT8_ARRAY(bufs.expected_interlaced, bufs.interlaced, example_len); + } + } + + diff_test_buffers_free(&bufs); +} \ No newline at end of file diff --git a/test/test_initialization.c b/test/test_initialization.c new file mode 100644 index 00000000..4faecdeb --- /dev/null +++ b/test/test_initialization.c @@ -0,0 +1,45 @@ +#include +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" + +#include "epd_board.h" +#include "epdiy.h" +#include "epd_display.h" + + +// choose the default demo board depending on the architecture +#ifdef CONFIG_IDF_TARGET_ESP32 +#define TEST_BOARD epd_board_v6 +#elif defined(CONFIG_IDF_TARGET_ESP32S3) +#define TEST_BOARD epd_board_v7 +#endif + +TEST_CASE("initialization and deinitialization works", "[epdiy,e2e]") +{ + epd_init(&TEST_BOARD, &ED097TC2, EPD_OPTIONS_DEFAULT); + + epd_poweron(); + vTaskDelay(2); + epd_poweroff(); + + epd_deinit(); +} + +TEST_CASE("re-initialization works", "[epdiy,e2e]") +{ + epd_init(&TEST_BOARD, &ED097TC2, EPD_OPTIONS_DEFAULT); + + epd_poweron(); + vTaskDelay(2); + epd_poweroff(); + + epd_deinit(); + + epd_init(&TEST_BOARD, &ED097TC2, EPD_OPTIONS_DEFAULT); + + epd_poweron(); + vTaskDelay(2); + epd_poweroff(); + + epd_deinit(); +} \ No newline at end of file diff --git a/test/test_line_mask.c b/test/test_line_mask.c new file mode 100644 index 00000000..9e7ec395 --- /dev/null +++ b/test/test_line_mask.c @@ -0,0 +1,27 @@ +#include +#include +#include +#include + + +void _epd_populate_line_mask(uint8_t* line_mask, const uint8_t* dirty_columns, int mask_len); + +const uint8_t col_dirtyness_example[8] = {0x00, 0x0F, 0x00, 0x11, 0xFF, 0xFF, 0x00, 0x80}; + + +TEST_CASE("mask populated correctly", "[epdiy,unit]") { + const uint8_t expected_mask[8] = {0x30, 0xF0, 0xFF, 0xC0, 0x00, 0x00, 0x00, 0x00}; + uint8_t mask[8] = {0}; + _epd_populate_line_mask(mask, col_dirtyness_example, 4); + + TEST_ASSERT_EQUAL_UINT8_ARRAY(expected_mask, mask, 8); +} + +TEST_CASE("neutral mask with null dirtyness", "[epdiy,unit]") { + const uint8_t expected_mask[8] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; + uint8_t mask[8] = {0}; + + _epd_populate_line_mask(mask, NULL, 4); + + TEST_ASSERT_EQUAL_UINT8_ARRAY(expected_mask, mask, 8); +} diff --git a/test/test_lut.c b/test/test_lut.c new file mode 100644 index 00000000..a3d8587d --- /dev/null +++ b/test/test_lut.c @@ -0,0 +1,153 @@ +#include +#include +#include +#include +#include +#include +#include +#include "epd_internals.h" +#include "epdiy.h" +#include "esp_attr.h" +#include "esp_timer.h" +#include "output_common/lut.h" + + +#define DEFAULT_EXAMPLE_LEN 1408 + +static const uint8_t input_data_pattern[16] = { + 0xFF, 0xFF, 0xF0, 0xFF, 0xFF, 0x00, 0x01, 0x10, 0xA5, 0xA5, 0x5A, 0x5A, 0xFF, 0xFF, 0x00, 0x08}; +static const uint8_t result_data_pattern_lcd[4] = {0x20, 0x90, 0x5A, 0x40}; + + +typedef void (*lut_func_t)(const uint32_t *, uint8_t *, const uint8_t *, uint32_t); +static uint8_t waveform_phases[16][4]; + + +static EpdWaveformPhases test_waveform = { + .phase_times = NULL, + .phases = 1, + .luts = (uint8_t*)waveform_phases, +}; + +typedef struct { + uint32_t* line_data; + uint8_t* result_line; + uint8_t* expected_line; + uint8_t* lut; + int example_len_px; +} LutTestBuffers; + + +static void fill_test_waveform() { + for (int to=0; to<16; to++) { + + memset(waveform_phases[to], 0, 4); + + for (int from=0; from<16; from++) { + uint8_t val = 0x00; + if (to < from) val = 0x01; + if (to > from) val = 0x02; + waveform_phases[to][from >> 2] |= val << (3 - (from & 0x3)) * 2; + } + } +} + +/** + * (Re-)fill buffers with example data, clear result buffers. + */ +static void lut_test_buffers_fill(LutTestBuffers* bufs) { + // initialize test and check patterns + for (int i = 0; i < bufs->example_len_px / 16; i++) { + memcpy(bufs->line_data + 4 * i, input_data_pattern, 16); + memcpy(bufs->expected_line + 4 * i, result_data_pattern_lcd, 4); + } + + memset(bufs->lut, 0, 1 << 16); + memset(bufs->result_line, 0, bufs->example_len_px / 4); + + fill_test_waveform(); +} + +/* + * Allocates and populates buffers for LUT tests. + */ +static void lut_test_buffers_init(LutTestBuffers* bufs, int example_len_px) { + bufs->line_data = heap_caps_aligned_alloc(16, example_len_px, MALLOC_CAP_DEFAULT); + bufs->result_line = heap_caps_aligned_alloc(16, example_len_px / 4, MALLOC_CAP_DEFAULT); + bufs->expected_line = heap_caps_aligned_alloc(16, example_len_px / 4, MALLOC_CAP_DEFAULT); + bufs->lut = heap_caps_aligned_alloc(16, 1 << 16, MALLOC_CAP_DEFAULT); + bufs->example_len_px = example_len_px; + + lut_test_buffers_fill(bufs); +} + +/** + * Free buffers used for LUT testing. + */ +static void diff_test_buffers_free(LutTestBuffers* bufs) { + heap_caps_free(bufs->line_data); + heap_caps_free(bufs->expected_line); + heap_caps_free(bufs->result_line); + heap_caps_free(bufs->lut); +} + +static void IRAM_ATTR test_with_alignments(LutTestBuffers* bufs, lut_func_t lut_func) { + int len = bufs->example_len_px; + int out_len = bufs->example_len_px / 4; + + uint8_t* expectation_backup = heap_caps_aligned_alloc(16, out_len, MALLOC_CAP_DEFAULT); + memcpy(expectation_backup, bufs->expected_line, out_len); + + // test combinations of start / end missalignment in four byte steps + for (int start_offset = 0; start_offset <= 16; start_offset += 4) { + for (int end_offset = 0; end_offset <= 16; end_offset += 4) { + int unaligned_len = len - end_offset - start_offset; + + memset(bufs->result_line, 0, out_len); + memcpy(bufs->expected_line, expectation_backup, out_len); + + // before and after the designated range the buffer shoulld be clear + memset(bufs->expected_line, 0, start_offset / 4); + memset(bufs->expected_line + (start_offset + unaligned_len) / 4, 0, end_offset / 4); + + printf("testing with alignment (in px): (%d, %d)... ", start_offset, unaligned_len); + + uint64_t start = esp_timer_get_time(); + for (int i=0; i < 100; i++) { + lut_func(bufs->line_data + start_offset / 4, bufs->result_line + start_offset / 4, bufs->lut, unaligned_len); + } + uint64_t end = esp_timer_get_time(); + + printf("took %.2fus per iter.\n", (end - start) / 100.0); + + // for (int i=0; i < out_len; i++) { + // printf("%X\n", bufs->result_line[i]); + // } + TEST_ASSERT_EQUAL_UINT8_ARRAY(bufs->expected_line, bufs->result_line, out_len); + } + } + + heap_caps_free(expectation_backup); +} + + +TEST_CASE("1ppB lookup LCD, 64k LUT", "[epdiy,unit]") { + LutTestBuffers bufs; + lut_test_buffers_init(&bufs, DEFAULT_EXAMPLE_LEN); + + enum EpdDrawMode mode = MODE_GL16 | MODE_PACKING_1PPB_DIFFERENCE | MODE_FORCE_NO_PIE; + TEST_ASSERT(calculate_lut(bufs.lut, 1 << 16, mode, 0, &test_waveform) == EPD_DRAW_SUCCESS); + test_with_alignments(&bufs, calc_epd_input_1ppB_64k); + + diff_test_buffers_free(&bufs); +} + +TEST_CASE("1ppB lookup LCD, 1k LUT, PIE", "[epdiy,unit]") { + LutTestBuffers bufs; + lut_test_buffers_init(&bufs, DEFAULT_EXAMPLE_LEN); + + enum EpdDrawMode mode = MODE_GL16 | MODE_PACKING_1PPB_DIFFERENCE; + TEST_ASSERT(calculate_lut(bufs.lut, 1 << 10, mode, 0, &test_waveform) == EPD_DRAW_SUCCESS); + test_with_alignments(&bufs, calc_epd_input_1ppB_1k_S3_VE); + diff_test_buffers_free(&bufs); +} \ No newline at end of file