diff --git a/experimental/kernels/tanh/CMakeLists.txt b/experimental/kernels/tanh/CMakeLists.txt
new file mode 100644
index 0000000..abe75fb
--- /dev/null
+++ b/experimental/kernels/tanh/CMakeLists.txt
@@ -0,0 +1,22 @@
+cmake_minimum_required(VERSION 3.28)
+project(tanh)
+
+set(FILENAME "gpu.h")
+
+get_filename_component(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
+get_filename_component(PROJECT_ROOT ${PROJECT_ROOT} DIRECTORY)
+
+# Construct potential paths
+set(FILEPATH_CURRENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}")
+set(FILEPATH_PROJECT_ROOT "${PROJECT_ROOT}/${FILENAME}")
+
+# Check if the file exists in the current directory
+if(EXISTS ${FILEPATH_CURRENT_DIR})
+    set(TARGET_FILE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
+elseif(EXISTS ${FILEPATH_PROJECT_ROOT})
+    set(TARGET_FILE_PATH ${PROJECT_ROOT})
+else()
+    message(FATAL_ERROR "File ${FILENAME} not found in either ${CMAKE_CURRENT_SOURCE_DIR} or ${CMAKE_CURRENT_SOURCE_DIR}/../../")
+endif()
+
+include("${TARGET_FILE_PATH}/cmake/example.cmake")
\ No newline at end of file
diff --git a/experimental/kernels/tanh/Makefile b/experimental/kernels/tanh/Makefile
new file mode 100644
index 0000000..551d663
--- /dev/null
+++ b/experimental/kernels/tanh/Makefile
@@ -0,0 +1,29 @@
+CXX=clang++
+GPUCPP ?= $(PWD)/../../..
+LIBDIR ?= $(GPUCPP)/third_party/lib
+LIBSPEC ?= . $(GPUCPP)/source
+NUM_JOBS?=$(shell nproc)
+TARGET=tanh
+ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/null 2>&1 ; echo $$?),0)
+    STDLIB :=
+else
+    STDLIB := -stdlib=libc++
+endif
+FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -ldawn
+
+run: ./build/$(TARGET) dawnlib
+	$(LIBSPEC) && ./build/$(TARGET)
+
+dawnlib: $(if $(wildcard $(GPUCPP)/third_party/lib/libdawn.so $(GPUCPP)/third_party/lib/libdawn.dylib),,run_setup)
+
+run_setup: check-python
+	cd $(GPUCPP) && python3 setup.py
+
+build/$(TARGET): run.cpp
+	mkdir -p build && $(CXX) $(FLAGS) -DNDEBUG -o ./build/$(TARGET)
+
+clean:
+	read -r -p "This will delete the contents of build/*. Are you sure? [CTRL-C to abort] " response && rm -rf build/*
+
+check-python:
+	@command -v python3 >/dev/null 2>&1 || { echo >&2 "Python needs to be installed and in your path."; exit 1; } 
diff --git a/experimental/kernels/tanh/run.cpp b/experimental/kernels/tanh/run.cpp
new file mode 100644
index 0000000..7cda23c
--- /dev/null
+++ b/experimental/kernels/tanh/run.cpp
@@ -0,0 +1,51 @@
+#include "gpu.h"
+#include <array>
+#include <cstdio>
+#include <future>
+
+using namespace gpu; // createContext, createTensor, createKernel,
+                     // createShader, dispatchKernel, wait, toCPU
+                     // Tensor, Kernel, Context, Shape, kf32
+
+static const char *kTan = R"(
+@group(0) @binding(0) var<storage, read_write> inp: array<{{precision}}>;
+@group(0) @binding(1) var<storage, read_write> out: array<{{precision}}>;
+@compute @workgroup_size({{workgroupSize}})
+fn main(
+    @builtin(global_invocation_id) GlobalInvocationID: vec3<u32>) {
+    let i: u32 = GlobalInvocationID.x;
+    if (i < arrayLength(&inp)) {
+        let x: f32 = inp[i];
+        out[i] = tan(x);
+    }
+}
+)";
+
+int main(int argc, char **argv) {
+  printf("\033[2J\033[1;1H");
+  printf("\nHello gpu.cpp!\n");
+  printf("--------------\n\n");
+
+  Context ctx = createContext();
+  static constexpr size_t N = 100000;
+  std::array<float, N> inputArr, outputArr;
+  for (int i = 0; i < N; ++i) {
+    inputArr[i] = static_cast<float>(i) / 10.0; // dummy input data
+  }
+  Tensor input = createTensor(ctx, Shape{N}, kf32, inputArr.data());
+  Tensor output = createTensor(ctx, Shape{N}, kf32);
+  std::promise<void> promise;
+  std::future<void> future = promise.get_future();
+  Kernel op = createKernel(ctx, {kTan, 256, kf32},
+                           Bindings{input, output},
+                           /* nWorkgroups */ {cdiv(N, 256), 1, 1});
+  dispatchKernel(ctx, op, promise);
+  wait(ctx, future);
+  toCPU(ctx, output, outputArr.data(), sizeof(outputArr));
+  for (int i = 0; i < 1000; ++i) {
+    printf("  tan(%.2f) = %.10f\n", inputArr[i], outputArr[i]);
+  }
+  printf("  ...\n\n");
+  printf("Computed %zu values of tan(x)\n\n", N);
+  return 0;
+}
\ No newline at end of file