diff --git a/src/modules/streamlit.py b/src/modules/streamlit.py
index 4ffeb18..4483592 100644
--- a/src/modules/streamlit.py
+++ b/src/modules/streamlit.py
@@ -178,6 +178,68 @@ def generate_top_scores(topic_sample: DataFrame, topic_name: str, position: int)
     return formatted_text_header
 
 
+def single_topic_formatting(
+    top_n_words: Series,
+    topic_sample: DataFrame,
+    topic_name: str,
+    topic_names: list,
+    stopwords: list,
+) -> list:
+    """Creates a streamlit annotate formatting setup for single topic
+
+    Parameters
+    ----------
+    top_n_words:Series
+        top n number of words with index numbers
+    topic_sample: DataFrame
+        sample of responses ordered by a particular topic
+    topic_name: str
+        name of the topic
+    topic_names: list
+        list of topic names
+    stopwords:list
+        list of inconsequential words removed from corpus during cleaning
+
+    Returns
+    -------
+    list
+        a formatted list of strings and tuples
+    """
+    color = get_single_topic_color(topic_names, topic_name)
+    reindexed_top_words = reindex_top_words(top_n_words)
+    word_stopword_combos = create_word_stopword_combos(reindexed_top_words, stopwords)
+    replacement_dict = create_formatting_dictionary(
+        word_stopword_combos, topic_name, color
+    )
+    responses = topic_sample["responses"].apply(
+        lambda x: insert_formatting_list(x, replacement_dict, word_stopword_combos)
+    )
+    split_responses = responses.apply(split_string_on_list)
+    formatted_responses = split_responses.apply(insert_tuple)
+    return list(formatted_responses)
+
+
+def get_single_topic_color(topic_names: list, topic_name: str) -> str:
+    """get the topic color for a single topic
+
+    Parameters
+    ----------
+    topic_names:list
+        list of topic names
+    topic_name:str
+        the topic name to select a color for
+
+    Returns
+    -------
+    str
+        hex code for the topic color"""
+    n_topics = len(topic_names)
+    topic_colors = get_hex_colors(n_topics).as_hex()
+    topic_number = [n for n, i in enumerate(topic_names) if i == topic_name]
+    topic_color = topic_colors[topic_number[0]]
+    return topic_color
+
+
 def get_hex_colors(n_colors: int) -> str:
     """Get the hex color codes for n_colors number of colors
 
@@ -193,29 +255,40 @@ def get_hex_colors(n_colors: int) -> str:
     return sns.color_palette(n_colors=n_colors).as_hex()
 
 
-def create_formatting_tuple(
-    dominant_topics: DataFrame, word: str, topic_color_dict: dict
-) -> tuple:
-    """create a formatting tuple for streamlit annotation
+def reindex_top_words(top_n_words: Series) -> Series:
+    """re-index top n words by the number of words in the phrase and then the
+    order of importance
 
     Parameters
     ----------
-    dominant_topics:DataFrame
-        dataframe of words and their strongest associated topic
-    word:str
-        word to create tuple for
-    topic_color_dict:dict
-        dictionary of topics and their assigned colors
+    top_n_words:Series
+        the top n number of words within a given topic
 
     Returns
     -------
-    tuple
-        formatting tuple containing word, topic, and color
-    """
-    topic_x = dominant_topics.loc[word, "variable"]
-    topic_pretty = re.sub("_", " ", topic_x).capitalize()
-    topic_color = topic_color_dict[topic_pretty]
-    return (word, topic_pretty, topic_color)
+    Series
+        A reordered version of the same series"""
+    reindexed_top_words = top_n_words.reset_index(drop=True).reset_index()
+    reindexed_top_words["n_words"] = reindexed_top_words.word.apply(count_words)
+    sorted_top_words = reindexed_top_words.sort_values(
+        ["n_words", "index"], ascending=[False, True]
+    ).word
+    return sorted_top_words
+
+
+def count_words(phrase: str) -> int:
+    """Count the number of words in a phrase
+
+    Parameters
+    ----------
+    phrase:str
+
+    Returns
+    -------
+    int
+        the number of words in the phrase"""
+    words = phrase.split()
+    return len(words)
 
 
 def create_word_stopword_combos(top_n_words: Series, stopwords: list) -> list:
@@ -245,106 +318,96 @@ def create_word_stopword_combos(top_n_words: Series, stopwords: list) -> list:
     return unnested_stopword_combo
 
 
-def insert_tuple(split_string: list) -> list:
-    """replace string with streamlit annotate formatting tuple
+def create_formatting_dictionary(
+    word_stopword_combos: list, topic_name: str, topic_color: str
+) -> dict:
+    """Create a lookup dictionary to replace words with formatting instructions
 
     Parameters
     ----------
-    split_string:list
-        list of strings which have been split at tuples
+    word_stopword_combos:list
+        list of top_n_words with joining stopword combinations
+    topic_name:str
+        the name of the topic
+    topic_color:str
+        the hex color code for the topic
 
     Returns
     -------
-    list
-        list of strings and formatting tuples
-    """
-    for n, i in enumerate(split_string):
-        matcher = re.match(r"\['[\w\s]+',\s'\w+\s\d+',\s'#[a-zA-Z0-9]{6}'\]", i)
-        if matcher:
-            replacement_tuple = tuple(
-                re.sub(r"\[|\]|'", "", matcher.group(0)).split(", ")
-            )
-            split_string[n] = replacement_tuple
-    return split_string
+    dict
+        a lookup dictionary for formatting replacements"""
+    keys = word_stopword_combos
+    values = [f"['{key}', '{topic_name}', '{topic_color}']" for key in keys]
+    snake_keys = [snake_case(key) for key in keys]
+    return dict(zip(snake_keys, values))
 
 
-def add_label_formatting(replacement_dict: dict, topic_sample: DataFrame) -> list:
-    """add streamlit annotate label formatting within string
+def insert_formatting_list(
+    string: str, replacement_dict: dict, word_stopword_combos: list
+) -> str:
+    """insert formatting lookup list at match points for dictionary keys
 
     Parameters
     ----------
+    string:str
+        the string to replace values within
     replacement_dict:dict
-        dictionary of values to replace with their tuple replacements
-    topic_sample: DataFrame
-        sample of responses ordered by a particular topic
+        lookup dictionary of replacments
+    word_stopword_combos:list
+        list of top_n_words with joining stopword combinations
 
     Returns
     -------
-    list
-        list of strings and formatting tuples
+    str
+        string with values replaced with values wrapped in formatting
     """
-    formatted_text = []
-    for sample in topic_sample["responses"]:
-        for key, value in replacement_dict.items():
-            sample = re.sub(rf"\s\b{key}\b", f" {value}", sample)
-        formatted_text.append([sample])
-    return formatted_text
+    for word in word_stopword_combos:
+        string = re.sub(rf"\b{word}\b", snake_case(word), string)
+    for key, value in replacement_dict.items():
+        string = re.sub(rf"(?<!')\b{key}\b(?!')", value, string)
+    return string
 
 
-def get_single_topic_color(topic_names: list, topic_name: str) -> str:
-    """get the topic color for a single topic
+def split_string_on_list(string: str) -> list:
+    """split string before and after formatting points
 
     Parameters
     ----------
-    topic_names:list
-        list of topic names
-    topic_name:str
-        the topic name to select a color for
+    string:str
+        the string to split
 
     Returns
     -------
-    str
-        hex code for the topic color"""
-    n_topics = len(topic_names)
-    topic_colors = get_hex_colors(n_topics).as_hex()
-    topic_number = [n for n, i in enumerate(topic_names) if i == topic_name]
-    topic_color = topic_colors[topic_number[0]]
-    return topic_color
+    list
+        a list of strings split at formatting points"""
+    pattern_behind = r"[\s,](?=\['[\w\s]+',\s'\w+\s\d+',\s'#[a-zA-Z0-9]{6}'\])"
+    pattern_ahead = r"(?<='#[a-zA-Z0-9]{6}'])[\s,]"
+    pattern_combined = "|".join([pattern_behind, pattern_ahead])
+    split_string = re.split(pattern_combined, string)
+    return split_string
 
 
-def single_topic_formatting(
-    top_n_words: Series, topic_sample: DataFrame, topic_name: str, topic_color: str
-) -> list:
-    """Creates a streamlit annotate formatting setup for single topic
+def insert_tuple(split_string: list) -> list:
+    """replace string with streamlit annotate formatting tuple
 
     Parameters
     ----------
-    top_n_words:Series
-        top n number of words with index numbers
-    topic_sample: DataFrame
-        sample of responses ordered by a particular topic
-    topic_name: str
-        name of the topic
-    topic_color: str
-        hex code for the topic
+    split_string:list
+        list of strings which have been split at tuples
 
     Returns
     -------
     list
-        a formatted list of strings and tuples
+        list of strings and formatting tuples
     """
-    pattern_behind = r"[\s,](?=\['[\w\s]+',\s'\w+\s\d+',\s'#[a-zA-Z0-9]{6}'\])"
-    pattern_ahead = r"(?<='#[a-zA-Z0-9]{6}'])[\s]"
-    pattern_combined = "|".join([pattern_behind, pattern_ahead])
-    top_n_words_x = top_n_words
-    replacements = [[i, topic_name, topic_color] for i in list(top_n_words)]
-    replacement_dict = dict(zip(top_n_words_x, replacements))
-    initial_formatted = add_label_formatting(replacement_dict, topic_sample)
-    for idx in range(len(initial_formatted)):
-        split_string = re.split(pattern_combined, initial_formatted[idx][0])
-        split_string = insert_tuple(split_string)
-        initial_formatted[idx] = split_string
-    return initial_formatted
+    for n, i in enumerate(split_string):
+        matcher = re.match(r"\['[\w\s]+',\s'\w+\s\d+',\s'#[a-zA-Z0-9]{6}'\]", i)
+        if matcher:
+            replacement_tuple = tuple(
+                re.sub(r"\[|\]|'", "", matcher.group(0)).split(", ")
+            )
+            split_string[n] = replacement_tuple
+    return split_string
 
 
 def multitopic_formatting(
@@ -384,3 +447,28 @@ def multitopic_formatting(
                 formatted_response.append(word + " ")
         formatted_text.append(formatted_response)
     return formatted_text
+
+
+def create_formatting_tuple(
+    dominant_topics: DataFrame, word: str, topic_color_dict: dict
+) -> tuple:
+    """create a formatting tuple for streamlit annotation
+
+    Parameters
+    ----------
+    dominant_topics:DataFrame
+        dataframe of words and their strongest associated topic
+    word:str
+        word to create tuple for
+    topic_color_dict:dict
+        dictionary of topics and their assigned colors
+
+    Returns
+    -------
+    tuple
+        formatting tuple containing word, topic, and color
+    """
+    topic_x = dominant_topics.loc[word, "variable"]
+    topic_pretty = re.sub("_", " ", topic_x).capitalize()
+    topic_color = topic_color_dict[topic_pretty]
+    return (word, topic_pretty, topic_color)
diff --git a/streamlit_app.py b/streamlit_app.py
index fe8f9cf..ae9f2d4 100644
--- a/streamlit_app.py
+++ b/streamlit_app.py
@@ -321,7 +321,7 @@
     word_stopword_combos = stream.create_word_stopword_combos(top_n_words, stopwords)
     topic_color = stream.get_single_topic_color(topic_names, topic_name)
     formatted_topic_single = stream.single_topic_formatting(
-        word_stopword_combos, topic_sample, topic_name, topic_color
+        top_n_words, topic_sample, topic_name, topic_names, stopwords
     )
     formatted_text = stream.multitopic_formatting(
         dominant_topics, topic_sample, topic_names
diff --git a/tests/modules/test_streamlit.py b/tests/modules/test_streamlit.py
new file mode 100644
index 0000000..5caa498
--- /dev/null
+++ b/tests/modules/test_streamlit.py
@@ -0,0 +1,307 @@
+import re
+from importlib import reload
+
+from pandas import DataFrame, Series
+
+# from src.modules import preprocessing as prep
+from src.modules import streamlit as stream
+
+reload(stream)
+
+
+class TestGetNTopWords:
+    def test_get_n_top_words(self):
+
+        test_df = DataFrame(
+            {
+                "topic_1_word_importance": [0, 1, 2],
+                "topic_2_word_importance": [0, 0, 0],
+                "word": ["alpha", "bravo", "charlie"],
+            }
+        )
+        actual = stream.get_top_n_words(topic_words=test_df, n=2, topic_name="Topic 1")
+        expected = Series(["bravo", "charlie"], index=[1, 2])
+        assert all(actual == expected)
+
+
+class TestIdentifyDominantTopics:
+    def test_identify_dominant_topics(self):
+        topic_names_snake = ["topic_1", "topic_2", "topic_3"]
+        test_df = DataFrame(
+            {
+                "word": ["alpha", "bravo", "charlie"],
+                "topic_1": [0, 1, 2],
+                "topic_2": [2, 3, 4],
+                "topic_3": [3, 2, 1],
+            }
+        )
+        actual = stream.identify_dominant_topics(
+            topic_words=test_df, topic_names_snake=topic_names_snake
+        )
+        expected = DataFrame(
+            {
+                "word": ["alpha", "bravo", "charlie"],
+                "variable": ["topic_3", "topic_2", "topic_2"],
+            }
+        )
+        assert all(actual == expected)
+
+
+class TestSnakeCase:
+    def test_snake_case(self):
+        actual = stream.snake_case("This string")
+        expected = "this_string"
+        assert actual == expected
+
+
+class TestGetNTopicSamples:
+    def test_get_n_topic_samples(self):
+        test_df = DataFrame(
+            {
+                "responses": ["hello word", "world hello", "hello hello"],
+                "topic_1": [0, 2, 1],
+            }
+        )
+        actual = stream.get_n_topic_samples(
+            text_with_topic_df=test_df, topic_name="Topic_1", n=2
+        )
+        expected = DataFrame(
+            {"responses": ["world hello", "hello hello"], "topic_1": [2, 1]}
+        )
+        assert all(actual == expected)
+
+
+class TestGetResponseNo:
+    def test_get_response_no(self):
+        test_df = DataFrame(
+            {
+                "responses": ["hello word", "world hello", "hello hello"],
+                "index": [455, 12, 11],
+            }
+        )
+        actual = stream.get_response_no(topic_sample=test_df, position=1)
+        expected = "Response 12"
+        assert actual == expected
+
+
+class TestGenerateTopScores:
+    def test_generate_top_scores(self):
+        test_df = DataFrame(
+            {
+                "responses": ["hello word", "world hello", "hello hello"],
+                "index": [53, 22, 12],
+                "topic_1": [0.1, 0.3, 0.01],
+                "topic_2": [0.12, 0.22, 0.32],
+            }
+        )
+        actual = stream.generate_top_scores(
+            topic_sample=test_df, topic_name="Topic 1", position=1
+        )
+        expected = "(Topic 1; Score: 30.0%)   (Topic 2; Score: 22.0%)"
+        assert actual == expected
+
+
+class TestSingleTopicFormatting:
+    def test_single_topic_formatting(self):
+        test_top_words = DataFrame({"word": ["hello world", "happy"]}).word
+        test_topic_sample = DataFrame(
+            {
+                "responses": [
+                    "hello world how are you",
+                    "world hello how am i",
+                    "I am so happy hello my world",
+                ],
+                "index": [53, 22, 12],
+                "topic_1": [0.1, 0.3, 0.01],
+                "topic_2": [0.12, 0.22, 0.32],
+            }
+        )
+        actual = stream.single_topic_formatting(
+            top_n_words=test_top_words,
+            topic_sample=test_topic_sample,
+            topic_name="Topic 1",
+            topic_names=["Topic 1", "Topic 2"],
+            stopwords=["my"],
+        )
+        expected = [
+            [("hello world", "Topic 1", "#1f77b4"), "how are you"],
+            ["world hello how am i"],
+            [
+                "I am so",
+                ("happy", "Topic 1", "#1f77b4"),
+                ("hello my world", "Topic 1", "#1f77b4"),
+            ],
+        ]
+        assert actual == expected
+
+
+class TestGetSingleTopicColor:
+    def test_get_single_topic_color(self):
+        test_topic_names = ["Topic 1", "Topic 2"]
+        topic_1 = stream.get_single_topic_color(
+            topic_names=test_topic_names, topic_name="Topic 1"
+        )
+        topic_2 = stream.get_single_topic_color(
+            topic_names=test_topic_names, topic_name="Topic 2"
+        )
+        assert topic_1 != topic_2
+
+
+class TestGetHexColors:
+    def test_get_hex_colors_is_hex(self):
+        actual = stream.get_hex_colors(n_colors=1)
+        assert re.match(r"#[a-zA-Z0-9]{6}", actual[0]), "does not match hex pattern"
+
+    def test_get_hex_colors_n_returns(self):
+        actual = stream.get_hex_colors(n_colors=4)
+        assert len(actual) == 4
+        actual = stream.get_hex_colors(n_colors=2)
+        assert len(actual) == 2
+
+
+class TestReindexTopWords:
+    def test_reindex_top_words(self):
+        test_top_words = Series(["hoppy", "hello world", "happy"], name="word")
+        actual = stream.reindex_top_words(test_top_words)
+        expected = Series(
+            ["hello world", "hoppy", "happy"], index=[1, 0, 2], name="word"
+        )
+        assert all(actual == expected)
+
+
+class TestCountWords:
+    def test_count_words(self):
+        assert stream.count_words("hello world") == 2
+        assert stream.count_words("hello") == 1
+
+
+class TestCreateWordStopWordCombos:
+    def test_create_word_stopword_combo(self):
+        test_stopwords = ["he", "her"]
+        test_words = Series(["hello world", "hello"], index=[21, 42])
+        actual = stream.create_word_stopword_combos(
+            top_n_words=test_words, stopwords=test_stopwords
+        )
+        expected = ["hello he world", "hello her world", "hello world", "hello"]
+        assert actual == expected
+
+
+class TestCreateFormattingDictionary:
+    def test_create_formatting_dictionary(self):
+        test_word_stopword_combos = ["hello my world", "hello world"]
+        actual = stream.create_formatting_dictionary(
+            word_stopword_combos=test_word_stopword_combos,
+            topic_name="Topic 1",
+            topic_color="#000000",
+        )
+        expected = {
+            "hello_my_world": "['hello my world', 'Topic 1', '#000000']",
+            "hello_world": "['hello world', 'Topic 1', '#000000']",
+        }
+        assert actual == expected
+
+
+class TestInsertFormattingList:
+    def test_insert_formatting_list(self):
+        test_string = "hello my world, how are you this glorious day"
+        test_replacement_dict = {
+            "hello_my_world": "['hello my world', 'Topic 1', '#000000']"
+        }
+        actual = stream.insert_formatting_list(
+            string=test_string,
+            replacement_dict=test_replacement_dict,
+            word_stopword_combos=["hello my world"],
+        )
+        expected = (
+            "['hello my world', 'Topic 1', '#000000'],"
+            + " how are you this glorious day"
+        )
+        assert actual == expected
+
+
+class TestSplitStringOnList:
+    def test_split_string_on_list(self):
+        test_string = "hello ['world', 'Topic 1', '#000000'], how are you"
+        actual = stream.split_string_on_list(test_string)
+        expected = ["hello", "['world', 'Topic 1', '#000000']", " how are you"]
+        assert actual == expected
+
+
+class TestInsertTuple:
+    def test_insert_tuple(self):
+        test_list = ["hello", "['world', 'Topic 1', '#000000']", " how are you"]
+        actual = stream.insert_tuple(test_list)
+        expected = ["hello", ("world", "Topic 1", "#000000"), " how are you"]
+        assert actual == expected
+
+
+class TestMultitopicFormatting:
+    def test_dominant_topics(self):
+        test_dominant_df = DataFrame(
+            {"word": ["hello", "world"], "variable": ["topic_1", "topic_2"]}
+        )
+        test_topic_sample = DataFrame(
+            {
+                "index": [23, 25, 29],
+                "responses": [
+                    "hello world how are you",
+                    "hello my world how are you",
+                    "poppy flowers on sunday in the world",
+                ],
+                "topic_1": [0.1, 0.4, 0.8],
+                "topic_2": [1.0, 0.6, 0.4],
+            }
+        )
+        actual = stream.multitopic_formatting(
+            dominant_topics=test_dominant_df,
+            topic_sample=test_topic_sample,
+            topic_names=["Topic 1", "Topic 2"],
+        )
+        expected = [
+            [
+                ("hello", "Topic 1", "#1f77b4"),
+                " ",
+                ("world", "Topic 2", "#ff7f0e"),
+                " ",
+                "how ",
+                "are ",
+                "you ",
+            ],
+            [
+                ("hello", "Topic 1", "#1f77b4"),
+                " ",
+                "my ",
+                ("world", "Topic 2", "#ff7f0e"),
+                " ",
+                "how ",
+                "are ",
+                "you ",
+            ],
+            [
+                "poppy ",
+                "flowers ",
+                "on ",
+                "sunday ",
+                "in ",
+                "the ",
+                ("world", "Topic 2", "#ff7f0e"),
+                " ",
+            ],
+        ]
+        assert actual == expected
+
+
+class TestCreateFormattingTuple:
+    def test_create_formatting_tuple(self):
+        test_dominant_topics = DataFrame(
+            {"variable": ["topic_1", "topic_2"]}, index=["hello", "world"]
+        )
+        test_topic_color_dict = {"Topic 1": "#000000", "Topic 2": "#999999"}
+        actual = stream.create_formatting_tuple(
+            dominant_topics=test_dominant_topics,
+            word="hello",
+            topic_color_dict=test_topic_color_dict,
+        )
+
+        expected = ("hello", "Topic 1", "#000000")
+        assert actual == expected