From 1ebc3636346a01f6c5709099d34c57f2cb00d7ea Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 8 Aug 2024 11:27:54 +0200
Subject: [PATCH 001/152] initial commit

---
 .../HPC chatbot preprocessor/.idea/.gitignore |   8 +
 .../.idea/HPC chatbot preprocessor.iml        |  10 +
 .../inspectionProfiles/Project_Default.xml    |  25 ++
 .../inspectionProfiles/profiles_settings.xml  |   6 +
 .../HPC chatbot preprocessor/.idea/misc.xml   |   7 +
 .../.idea/modules.xml                         |   8 +
 .../HPC chatbot preprocessor/.idea/vcs.xml    |   6 +
 scripts/HPC chatbot preprocessor/main.py      | 375 ++++++++++++++++++
 .../HPC chatbot preprocessor/start_checker.py |  17 +
 9 files changed, 462 insertions(+)
 create mode 100644 scripts/HPC chatbot preprocessor/.idea/.gitignore
 create mode 100644 scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml
 create mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml
 create mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 scripts/HPC chatbot preprocessor/.idea/misc.xml
 create mode 100644 scripts/HPC chatbot preprocessor/.idea/modules.xml
 create mode 100644 scripts/HPC chatbot preprocessor/.idea/vcs.xml
 create mode 100644 scripts/HPC chatbot preprocessor/main.py
 create mode 100644 scripts/HPC chatbot preprocessor/start_checker.py
diff --git a/scripts/HPC chatbot preprocessor/.idea/.gitignore b/scripts/HPC chatbot preprocessor/.idea/.gitignore
new file mode 100644
index 00000000000..13566b81b01
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/.idea/.gitignore	
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml b/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml
new file mode 100644
index 00000000000..2c80e126949
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml	
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 00000000000..fc946d9cefc
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml	
@@ -0,0 +1,25 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <Languages>
+        <language minSize="205" name="Python" />
+      </Languages>
+    </inspection_tool>
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="E501" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N802" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 00000000000..105ce2da2d6
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml	
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/misc.xml b/scripts/HPC chatbot preprocessor/.idea/misc.xml
new file mode 100644
index 00000000000..54cda8fd6dd
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/.idea/misc.xml	
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12 (HPC chatbot preprocessor)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (HPC chatbot preprocessor)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/modules.xml b/scripts/HPC chatbot preprocessor/.idea/modules.xml
new file mode 100644
index 00000000000..58e027d745f
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/.idea/modules.xml	
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/HPC chatbot preprocessor.iml" filepath="$PROJECT_DIR$/.idea/HPC chatbot preprocessor.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/vcs.xml b/scripts/HPC chatbot preprocessor/.idea/vcs.xml
new file mode 100644
index 00000000000..b2bdec2d71b
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/.idea/vcs.xml	
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
new file mode 100644
index 00000000000..7f74fb12fb2
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -0,0 +1,375 @@
+import os
+import re
+import shutil
+
+# test_number = int(input("Which test should be run?"))
+#
+# # Test for strip_markdown (somewhat successful, see findings file)
+#
+# if test_number == 1:
+#     import strip_markdown
+#
+#     strip_markdown.strip_markdown_file("C:\\HPC werk\\Chatbot\\md_to_plaintext_test.md")
+#
+# # Test if copy of document doesn't change original document (successful)
+# if test_number == 2:
+#     import shutil
+#
+#     shutil.copyfile("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test.txt",
+#                     "C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt")
+#     with open("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt", 'w') as file:
+#         file.write('hello')
+
+# Test with actual document
+
+# make a copy of one of the md files to test some things
+shutil.copyfile("C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\getting_started.md",
+                "C:\\HPC_werk\\Chatbot\\getting_started_copy.md")
+
+################### define global variables ###################
+# variable for the filename (which will be changed into something else in the final version)
+filename = "getting_started_copy.md"
+
+# variable for the main title (needed for reference links)
+main_title = filename[:-3]
+
+# variable that keeps track of the directories that are used to write in at different levels
+root_dir_generic = "C:\\HPC_werk\\Chatbot\\parsed_mds\\generic\\"
+root_dir_os_specific_linux = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\linux\\"
+root_dir_os_specific_windows = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\windows\\"
+root_dir_os_specific_macos = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\macos\\"
+curr_dirs = [filename[:-3] for i in range(4)]
+
+# variable to keep track whether we're dealing with OS-specific info or not
+OS_specific = False
+
+# pattern for the regex if-statement to filter out markdown titles
+if_pattern = r'^#+ '
+
+# variable that keeps track of the latest non-zero level title and corresponding directory
+last_title_level = 1
+last_title = None
+last_directory = None
+last_was_title = False
+
+# list to keep track of links in the text
+links_generic = []
+links_linux = []
+links_windows = []
+links_macos = []
+
+# dictionaries to keep track of current OS and location
+active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
+active_site_if_states = {"Gent": "inactive", "not-Gent": "inactive"}
+
+# variable to keep track of the type of if-statement
+if_type = "OS"
+
+# variable to keep track of the macro-replacements at the top of markdown files
+replacements = {}
+
+# variable that is used to detect whether the first title has been encountered yet
+after_first_title = False
+
+
+################### define functions ###################
+
+# function that removes the previous file structure before starting the process of making a new one
+def remove_directory_tree(old_directory):
+    if os.path.exists(old_directory):
+        shutil.rmtree(old_directory)
+
+
+# function that checks the first lines of a file until a title is found and saves the macro-replacements to the list
+def save_replacements(curr_line):
+    global replacements
+    match = re.search(r'\{% set (.*?)="(.*?)" %}', curr_line)
+    replacements[match.group(1)] = match.group(2)
+
+
+# function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title)
+def check_for_title_logic(curr_line):
+    global curr_dirs
+    match = re.match(if_pattern, curr_line)
+    if match and len(match.group(0)) <= 4:
+        return len(match.group(0)) - 1
+    else:
+        return 0
+
+
+# function that resets the contents of the link_lists
+def reset_link_lists():
+    global links_generic, links_linux, links_windows, links_macos
+    links_generic = []
+    links_linux = []
+    links_windows = []
+    links_macos = []
+
+
+# function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables
+def check_for_title(curr_line):
+    global curr_dirs, last_title
+    logic_output = check_for_title_logic(curr_line)
+    if logic_output == 0:
+        return 0, None, None
+    else:
+        if last_title is not None:
+            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
+            write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux)
+            write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows)
+            write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos)
+            reset_link_lists()
+
+        curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + curr_line[logic_output + 1:-1].replace(' ', '-')
+
+        create_directory(root_dir_generic + curr_dirs[logic_output])
+        create_directory(root_dir_os_specific_linux + curr_dirs[logic_output])
+        create_directory(root_dir_os_specific_windows + curr_dirs[logic_output])
+        create_directory(root_dir_os_specific_macos + curr_dirs[logic_output])
+
+        update_lower_curr_dir(curr_dirs[logic_output], logic_output)
+        return logic_output, curr_line[logic_output + 1:-1].replace(' ', '-'), curr_dirs[logic_output]
+
+
+# function that creates directories if needed
+def create_directory(new_directory):
+    if not os.path.exists(new_directory):
+        os.mkdir(new_directory)
+
+
+# function that updates the curr_dir variables when needed
+def update_lower_curr_dir(curr_directory, level):
+    global curr_dirs
+    for i in range(level + 1, 4):
+        curr_dirs[i] = curr_directory
+
+
+# function that replaces certain markdown structures with the equivalent used on the website
+def replace_markdown_markers(curr_line, linklist):
+
+    # replace {{hpcinfra}}
+    curr_line = re.sub(r'\{\{\s*hpcinfra\s*}}', "HPC-UGent infrastructure", curr_line)
+
+    # replace other replacement macros
+    for macro in replacements.keys():
+        curr_line = re.sub(r'\{\{\s*' + re.escape(macro) + r'\s*}}', replacements[macro], curr_line)
+
+    # replace links with a reference
+    matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line)
+    if matches:
+        for match in matches:
+            print(f"[{match[0]}]({match[1]})")
+            curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]")
+            linklist.append(match[1])
+
+    return curr_line, linklist
+
+
+# function that checks for if-statements
+def check_if_statements(curr_line):
+    global if_type
+
+    # check whether the first part of the line contains information wrt if-statements
+    match = re.search(r'^\{%-\s([^%]*)%}(.*)', curr_line)
+
+    # check whether the line contains information wrt if-statements that is not in its first part
+    match_large = re.search(r'^(.*)(\{%-\s[^%]*%})(.*)', curr_line)
+
+    if match:
+        print("################################################################################")
+        content = match.group(1)
+        print(content)
+
+        # new if-statement wrt OS
+        if re.match(r'if OS == ', content):
+            OS = content[9:-1]
+
+            # set new active OS
+            active_OS_if_states[OS] = "active"
+
+            # set other active ones on inactive
+            for other_OS in active_OS_if_states.keys():
+                if other_OS != OS and active_OS_if_states[other_OS] == "active":
+                    active_OS_if_states[other_OS] = "inactive"
+
+            if_type = "OS"
+
+        # new if-statement wrt site
+        elif re.match(r'if site == ', content):
+            if re.search(r'(?i)gent', content):
+                active_site_if_states["Gent"] = "active"
+                active_site_if_states["not-Gent"] = "inactive"
+            else:
+                active_site_if_states["not-Gent"] = "active"
+                if active_site_if_states["Gent"] == "active":
+                    active_site_if_states["Gent"] = "inactive"
+            if_type = "site"
+
+        # endif statement wrt OS
+        elif re.match(r'endif ', content) and if_type == "OS":
+            if str(1) in active_OS_if_states.values():
+                active_OS_if_states[list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active"
+            else:
+                for key in active_OS_if_states.keys():
+                    active_OS_if_states[key] = "inactive"
+
+        # endif statement wrt site
+        elif re.match(r'endif ', content) and if_type == "site":
+            for key in active_site_if_states.keys():
+                active_site_if_states[key] = "inactive"
+
+        # else statement wrt OS
+        elif re.match(r'else ', content) and if_type == "OS":
+
+            i = 0
+            for i in range(3):
+                if str(i) not in active_OS_if_states.values():
+                    break
+
+            # set the previously active one on inactive until the next endif
+            key_list = list(active_OS_if_states.keys())
+            position = list(active_OS_if_states.values()).index("active")
+            active_OS_if_states[key_list[position]] = str(i)
+
+            # set inactive ones on active
+            while "inactive" in active_OS_if_states.values():
+                position = list(active_OS_if_states.values()).index("inactive")
+                active_OS_if_states[key_list[position]] = "active"
+
+        # else statement wrt site
+        elif re.match(r'else ', content) and if_type == "site":
+
+            # change state of "Gent" and set not-Gent on active
+            if active_site_if_states["Gent"] == "inactive":
+                active_site_if_states["Gent"] = "active"
+            elif active_site_if_states["Gent"] == "active":
+                active_site_if_states["Gent"] = str(0)
+            active_site_if_states["not-Gent"] = "active"
+
+        print(active_OS_if_states)
+        print(active_site_if_states)
+
+        if len(match.group(2)) != 0:
+            extra_message = match.group(2).lstrip()
+            print(extra_message)
+            # check_if_statements(extra_message)
+            print("check_extra_message")
+            return "check_extra_message", extra_message, None
+
+        else:
+            print("done")
+            return "done", None, None
+
+    elif match_large:
+        print("################################################################################")
+        print(active_OS_if_states)
+        print(active_site_if_states)
+        print(match_large.group(1))
+        print(match_large.group(2))
+        print("write_text_and_check_extra_message")
+        return "write_text_and_check_extra_message", match_large.group(2), match_large.group(1)
+
+    else:
+        return "write_text", None, curr_line
+
+
+# function that writes a line to a file
+def write_text_to_file(file_name, curr_line):
+    global links_generic, links_linux, links_windows, links_macos
+    with open(file_name, "a") as write_file:
+        if "generic" in file_name:
+            curr_line, links_generic = replace_markdown_markers(curr_line, links_generic)
+        elif "linux" in file_name:
+            curr_line, links_linux = replace_markdown_markers(curr_line, links_linux)
+        elif "windows" in file_name:
+            curr_line, links_windows = replace_markdown_markers(curr_line, links_windows)
+        else:
+            curr_line, links_macos = replace_markdown_markers(curr_line, links_macos)
+        write_file.write(curr_line)
+
+
+# function that decides what file to write text to
+def choose_and_write_to_file(curr_line):
+    # check that the line is part of the website for gent
+    if active_site_if_states["Gent"] == "active" or active_site_if_states["Gent"] == "inactive" and active_site_if_states["not-Gent"] == "inactive":
+        if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive":
+            write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line)
+        if active_OS_if_states["linux"] == "active":
+            write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line)
+        if active_OS_if_states["windows"] == "active":
+            write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line)
+        if active_OS_if_states["macos"] == "active":
+            write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line)
+
+
+# function that adds a reference link at the end of every txt file
+def add_reference_link(file_location, reference_link):
+    with open(file_location, 'a') as write_file:
+        write_file.write("\nreference: " + reference_link + "\n")
+
+
+# function that adds the links that should be at the end of a file
+def write_end_of_file(file_location, OS, linklist):
+    if len(OS) > 0:
+        OS = OS + "/"
+
+    # add the links from within the document
+    with open(file_location, 'a') as write_file:
+        for i, link in enumerate(linklist):
+            write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n")
+
+    # finally add the reference link
+    add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + main_title + "/#" + last_title.lower())
+
+
+################### actually parse the md file ###################
+
+# remove the old directories if needed
+remove_directory_tree(root_dir_generic)
+remove_directory_tree(root_dir_os_specific_linux)
+remove_directory_tree(root_dir_os_specific_windows)
+remove_directory_tree(root_dir_os_specific_macos)
+
+# create directories for the source markdown file
+create_directory(root_dir_generic)
+create_directory(root_dir_os_specific_linux)
+create_directory(root_dir_os_specific_windows)
+create_directory(root_dir_os_specific_macos)
+create_directory(root_dir_generic + curr_dirs[0])
+create_directory(root_dir_os_specific_linux + curr_dirs[0])
+create_directory(root_dir_os_specific_windows + curr_dirs[0])
+create_directory(root_dir_os_specific_macos + curr_dirs[0])
+
+# open the file and store line by line in the right file
+with open("C:\\HPC_werk\\Chatbot\\getting_started_copy.md", 'r') as readfile:
+
+    for line in readfile:
+        title_level, title, directory = check_for_title(line)
+
+        # line is a title with a maximum depth of 3
+        if title_level > 0:
+            last_title_level = title_level
+            last_title = title
+            last_directory = directory
+            after_first_title = True
+
+        # line is not a title
+        else:
+            if after_first_title:
+                # check for if-statements and write the appropriate lines in the right files
+                next_action = check_if_statements(line)
+                while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
+                    if next_action[0] == "write_text_and_check_extra_message":
+                        choose_and_write_to_file(next_action[2])
+                    next_action = check_if_statements(next_action[1])
+
+                if next_action[0] == "write_text":
+                    choose_and_write_to_file(next_action[2])
+            else:
+                save_replacements(line)
+
+# write end of file for the last file
+write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
+write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux)
+write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows)
+write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos)
diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py
new file mode 100644
index 00000000000..50b61cd5213
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/start_checker.py	
@@ -0,0 +1,17 @@
+import os
+
+directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC"
+
+for dirpath, dirnames, filenames in os.walk(directory):
+    for filename in filenames:
+        # if filename.endswith("xdmod.md"):
+        #     break
+        if filename.endswith(".md"):
+            lines_until_title = 0
+            with open(directory + "\\" + filename, "r") as file:
+                for line in file:
+                    if line[0] == "#":
+                        break
+                    lines_until_title += 1
+            print(filename + " : " + str(lines_until_title))
+    break

From 10edb2050da2ffc7412c7123c190cb4991682daf Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 8 Aug 2024 17:09:18 +0200
Subject: [PATCH 002/152] some cleanup

---
 .../copies/getting_started_copy.md            | 268 ++++++++++++++++++
 .../Getting-Access/Getting-Access.txt         |  25 ++
 .../Getting-Connected/Getting-Connected.txt   |  19 ++
 .../Getting-Started/Getting-Started.txt       |  11 +
 .../Inspect-your-results.txt                  |  56 ++++
 .../Getting-Started/Next-steps/Next-steps.txt |  15 +
 .../Submitting-a-job/Submitting-a-job.txt     |  60 ++++
 .../Transfer-your-files.txt                   |  21 ++
 .../Wait-for-job-to-be-executed.txt           |  26 ++
 .../Getting-Access/Getting-Access.txt         |   2 +
 .../Getting-Connected/Getting-Connected.txt   |  18 ++
 .../Getting-Started/Getting-Started.txt       |   2 +
 .../Inspect-your-results.txt                  |   2 +
 .../Getting-Started/Next-steps/Next-steps.txt |   2 +
 .../Submitting-a-job/Submitting-a-job.txt     |   2 +
 .../Transfer-your-files.txt                   |  21 ++
 .../Wait-for-job-to-be-executed.txt           |   2 +
 .../Getting-Access/Getting-Access.txt         |   2 +
 .../Getting-Connected/Getting-Connected.txt   |  13 +
 .../Getting-Started/Getting-Started.txt       |   2 +
 .../Inspect-your-results.txt                  |   2 +
 .../Getting-Started/Next-steps/Next-steps.txt |   2 +
 .../Submitting-a-job/Submitting-a-job.txt     |   2 +
 .../Transfer-your-files.txt                   |  21 ++
 .../Wait-for-job-to-be-executed.txt           |   2 +
 .../Getting-Access/Getting-Access.txt         |   2 +
 .../Getting-Connected/Getting-Connected.txt   |  13 +
 .../Getting-Started/Getting-Started.txt       |   2 +
 .../Inspect-your-results.txt                  |   2 +
 .../Getting-Started/Next-steps/Next-steps.txt |   2 +
 .../Submitting-a-job/Submitting-a-job.txt     |   2 +
 .../Transfer-your-files.txt                   |  15 +
 .../Wait-for-job-to-be-executed.txt           |   2 +
 .../HPC chatbot preprocessor/jinja_parser.py  |   3 +
 scripts/HPC chatbot preprocessor/main.py      |  26 +-
 .../HPC chatbot preprocessor/start_checker.py |   2 +
 36 files changed, 662 insertions(+), 7 deletions(-)
 create mode 100644 scripts/HPC chatbot preprocessor/copies/getting_started_copy.md
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
 create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
 create mode 100644 scripts/HPC chatbot preprocessor/jinja_parser.py

diff --git a/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md b/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md
new file mode 100644
index 00000000000..8fe33ebc513
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md	
@@ -0,0 +1,268 @@
+{% set exampleloc="mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist" %}
+# Getting Started
+
+Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the {{hpcinfra}} and submitting your very first job. We'll also walk you through the process step by step using a practical example.
+
+In addition to this chapter, you might find the [recording of the *Introduction to HPC-UGent* training session](https://www.ugent.be/hpc/en/training/introhpcugent-recording) to be a useful resource.
+
+Before proceeding, read [the introduction to HPC](introduction.md) to gain an understanding of the {{ hpcinfra }} and related terminology.
+
+### Getting Access
+
+To get access to the {{hpcinfra}}, visit [Getting an HPC Account](account.md).
+
+If you have not used Linux before, 
+{%- if site == 'Gent' %}
+now would be a good time to follow our [Linux Tutorial](linux-tutorial/index.md).
+{%- else %}
+please learn some basics first before continuing. (see [Appendix C - Useful Linux Commands](useful_linux_commands.md))
+{%- endif %}
+
+#### A typical workflow looks like this:
+
+1.  Connect to the login nodes 
+2.  Transfer your files to the {{hpcinfra}}
+3.  Optional: compile your code and test it 
+4.  Create a job script and submit your job
+5.  Wait for job to be executed
+6.  Study the results generated by your jobs, either on the cluster or
+    after downloading them locally.
+
+We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using [TensorFlow](https://www.tensorflow.org/);
+see the [example scripts](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}}).
+
+### Getting Connected
+
+There are two options to connect
+
+- Using a terminal to connect via SSH (for power users) (see [First Time connection to the {{ hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure))
+- [Using the web portal](web_portal.md)
+
+Considering your operating system is **{{OS}}**, 
+
+{%- if OS == linux %}
+it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. 
+
+Assuming you have already generated SSH keys in the previous step ([Getting Access](#getting-access)), and that they are in a default location, you should now be able to login by running the following command:
+
+<pre><code>ssh {{userid}}@{{loginnode}}</code></pre>
+
+!!! Warning "User your own VSC account id"
+    
+    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
+
+!!! Tip
+
+    You can also still use the web portal (see [shell access on web portal](web_portal.md#shell-access))
+
+{%- else %}
+{%- if OS == windows %} it is recommended to use the web portal.
+{%- else %} it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. {%- endif %}
+
+The [web portal](web_portal.md) offers a convenient way to upload files and gain shell access to the {{hpcinfra}} from a standard web browser (no software installation or configuration required).
+
+See [shell access](web_portal.md#shell-access) when using the web portal, or
+[connection to the {{hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure) when using a terminal.
+
+Make sure you can get to a shell access to the {{hpcinfra}} before proceeding with the next steps.
+
+{%- endif %}
+
+!!! Info
+
+    When having problems see the [connection issues section on the troubleshooting page](troubleshooting.md#sec:connecting-issues).
+
+
+### Transfer your files
+
+Now that you can login, it is time to transfer files from your local computer to your **home directory** on the {{hpcinfra}}.
+
+Download [tensorflow_mnist.py](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py) 
+and [run.sh](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh) example scripts to your computer (from [here](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}})).
+
+{%- if OS == windows %}
+
+The [HPC-UGent web portal](https://login.hpc.ugent.be) provides a file browser that allows uploading files.
+For more information see the [file browser section](web_portal.md#file-browser).
+
+Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell.
+
+!!! Info
+
+    As an alternative, you can use WinSCP (see [our section](connecting.md#winscp))
+
+{%- else %}
+
+On your local machine you can run:
+<pre><code>curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py
+curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh
+</code></pre>
+
+Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC).
+<pre><code>scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ </code></pre>
+<pre><code>ssh  {{userid}}@{{ loginnode }} </code></pre>
+
+!!! Warning "User your own VSC account id"
+    
+    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
+
+!!! Info
+
+    For more information about transfering files or `scp`, see [tranfer files from/to hpc](connecting.md#transfer-files-tofrom-the-hpc).
+
+{%- endif %}
+
+When running `ls` in your session on the {{hpcinfra}}, you should see the two files listed in your home directory (`~`):
+
+```shell
+$ ls ~
+run.sh tensorflow_mnist.py
+```
+
+When you do not see these files, make sure you uploaded the files to your **home directory**.
+
+### Submitting a job
+
+Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script.
+
+A job script is a shell script, a text file that specifies the resources, 
+the software that is used (via `module load` statements), 
+and the steps that should be executed to run the calculation.
+
+Our job script looks like this:
+
+<center>-- run.sh --</center>
+
+```bash
+#!/bin/bash
+
+module load TensorFlow/2.11.0-foss-2022a
+
+python tensorflow_mnist.py
+
+```
+<sub>As you can see this job script will run the Python script named **tensorflow_mnist.py**.</sub>
+
+
+The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command.
+
+```shell
+module swap cluster/{{othercluster}}
+```
+
+!!! Tip
+    
+    When submitting jobs with limited amount of resources, it is recommended to use the [debug/interactive cluster](interactive_debug.md#interactive-and-debug-cluster): `donphan`. 
+
+{%- if site == 'Gent' %}
+
+    To get a list of all clusters and their hardware, see <https://www.ugent.be/hpc/en/infrastructure>.
+
+{%- endif %}
+
+This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command:
+
+```shell
+$ qsub run.sh
+{{jobid}}
+```
+
+This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job.
+
+!!! Warning "Make sure you understand what the `module` command does"
+ 
+    Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, 
+    but our active shell session is still running on the login node.
+    
+    It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on.
+    
+    When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`).
+
+For detailed information about `module` commands, read the [running batch jobs](running_batch_jobs.md) chapter.
+
+### Wait for job to be executed
+
+Your job is put into a queue before being executed, so it may take a while before it actually starts.
+(see [when will my job start?](running_batch_jobs.md#when-will-my-job-start) for scheduling policy).
+
+You can get an overview of the active jobs using the `qstat` command:
+<pre><code>$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:00  <b style="color:orange">Q</b> {{othercluster}}
+</code></pre> 
+
+Eventually, after entering `qstat` again you should see that your job has started running:
+<pre><code>$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:01  <b style="color:green">R</b> {{othercluster}}
+</code></pre> 
+
+If you don't see your job in the output of the `qstat` command anymore, your job has likely completed.
+
+Read [this section](running_batch_jobs.md#monitoring-and-managing-your-jobs) on how to interpret the output.
+
+### Inspect your results
+
+When your job finishes it generates 2 output files:
+
+- One for normal output messages (*stdout* output channel).
+- One for warning and error messages (*stderr* output channel).
+
+By default located in the directory where you issued `qsub`.
+
+{%- if site == 'Gent' %}
+
+!!! Info
+
+    For more information about the stdout and stderr output channels, see this [section](linux-tutorial/beyond_the_basics.md#inputoutput).
+
+{%- endif %}
+
+In our example when running <code>ls</code> in the current directory you should see 2 new files:
+ 
+- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}};
+- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}.
+
+!!! Info
+    
+    run.sh.e{{jobid}} should be empty (no errors or warnings).
+
+!!! Warning "Use your own job ID"
+
+    Replace <b>{{jobid}}</b> with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`.
+
+When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this:
+```
+Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
+11493376/11490434 [==============================] - 1s 0us/step
+Epoch 1/5
+1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133
+Epoch 2/5
+1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571
+Epoch 3/5
+1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675
+Epoch 4/5
+1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727
+Epoch 5/5
+1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768
+313/313 - 0s - loss: 0.0782 - accuracy: 0.9764
+```
+
+Hurray 🎉, we trained a deep learning model and achieved 97,64 percent accuracy.
+
+!!! Warning
+
+    When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see [GPU clusters](gpu.md).
+
+    For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster.
+
+### Next steps
+
+- [Running interactive jobs](running_interactive_jobs.md)
+- [Running jobs with input/output data](running_jobs_with_input_output_data.md)
+- [Multi core jobs/Parallel Computing](multi_core_jobs.md)
+- [Interactive and debug cluster](interactive_debug.md#interactive-and-debug-cluster)
+
+For more examples see [Program examples](program_examples.md) and [Job script examples](jobscript_examples.md)
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
new file mode 100644
index 00000000000..f95191b96f0
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt	
@@ -0,0 +1,25 @@
+
+To get access to the HPC-UGent infrastructure, visit Getting an HPC Account[1].
+
+If you have not used Linux before, 
+now would be a good time to follow our Linux Tutorial[2].
+
+#### A typical workflow looks like this:
+
+1.  Connect to the login nodes 
+2.  Transfer your files to the HPC-UGent infrastructure
+3.  Optional: compile your code and test it 
+4.  Create a job script and submit your job
+5.  Wait for job to be executed
+6.  Study the results generated by your jobs, either on the cluster or
+    after downloading them locally.
+
+We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using TensorFlow[3];
+see the example scripts[4].
+
+[1]: account.md
+[2]: linux-tutorial/index.md
+[3]: https://www.tensorflow.org/
+[4]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist
+
+reference: docs.hpc.ugent.be/getting_started_copy/#getting-access
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
new file mode 100644
index 00000000000..94f17ac5070
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt	
@@ -0,0 +1,19 @@
+
+There are two options to connect
+
+- Using a terminal to connect via SSH (for power users) (see First Time connection to the HPC-UGent infrastructure[1])
+- Using the web portal[2]
+
+Considering your operating system is **{{OS}}**, 
+
+
+!!! Info
+
+    When having problems see the connection issues section on the troubleshooting page[3].
+
+
+[1]: connecting.md#first-time-connection-to-the-hpc-infrastructure
+[2]: web_portal.md
+[3]: troubleshooting.md#sec:connecting-issues
+
+reference: docs.hpc.ugent.be/getting_started_copy/#getting-connected
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt
new file mode 100644
index 00000000000..3403b57f2c2
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt	
@@ -0,0 +1,11 @@
+
+Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the HPC-UGent infrastructure and submitting your very first job. We'll also walk you through the process step by step using a practical example.
+
+In addition to this chapter, you might find the recording of the *Introduction to HPC-UGent* training session[1] to be a useful resource.
+
+Before proceeding, read the introduction to HPC[2] to gain an understanding of the HPC-UGent infrastructure and related terminology.
+
+[1]: https://www.ugent.be/hpc/en/training/introhpcugent-recording
+[2]: introduction.md
+
+reference: docs.hpc.ugent.be/getting_started_copy/#getting-started
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
new file mode 100644
index 00000000000..417416007f5
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt	
@@ -0,0 +1,56 @@
+
+When your job finishes it generates 2 output files:
+
+- One for normal output messages (*stdout* output channel).
+- One for warning and error messages (*stderr* output channel).
+
+By default located in the directory where you issued `qsub`.
+
+
+!!! Info
+
+    For more information about the stdout and stderr output channels, see this section[1].
+
+
+In our example when running <code>ls</code> in the current directory you should see 2 new files:
+ 
+- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}};
+- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}.
+
+!!! Info
+    
+    run.sh.e{{jobid}} should be empty (no errors or warnings).
+
+!!! Warning "Use your own job ID"
+
+    Replace <b>{{jobid}}</b> with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`.
+
+When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this:
+```
+Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
+11493376/11490434 [==============================] - 1s 0us/step
+Epoch 1/5
+1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133
+Epoch 2/5
+1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571
+Epoch 3/5
+1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675
+Epoch 4/5
+1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727
+Epoch 5/5
+1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768
+313/313 - 0s - loss: 0.0782 - accuracy: 0.9764
+```
+
+Hurray 🎉, we trained a deep learning model and achieved 97,64 percent accuracy.
+
+!!! Warning
+
+    When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see GPU clusters[2].
+
+    For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster.
+
+[1]: linux-tutorial/beyond_the_basics.md#inputoutput
+[2]: gpu.md
+
+reference: docs.hpc.ugent.be/getting_started_copy/#inspect-your-results
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
new file mode 100644
index 00000000000..804b56b8251
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt	
@@ -0,0 +1,15 @@
+
+- Running interactive jobs[1]
+- Running jobs with input/output data[2]
+- Multi core jobs/Parallel Computing[3]
+- Interactive and debug cluster[4]
+
+For more examples see Program examples[5] and Job script examples[6]
+[1]: running_interactive_jobs.md
+[2]: running_jobs_with_input_output_data.md
+[3]: multi_core_jobs.md
+[4]: interactive_debug.md#interactive-and-debug-cluster
+[5]: program_examples.md
+[6]: jobscript_examples.md
+
+reference: docs.hpc.ugent.be/getting_started_copy/#next-steps
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
new file mode 100644
index 00000000000..edb336fa06b
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt	
@@ -0,0 +1,60 @@
+
+Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script.
+
+A job script is a shell script, a text file that specifies the resources, 
+the software that is used (via `module load` statements), 
+and the steps that should be executed to run the calculation.
+
+Our job script looks like this:
+
+<center>-- run.sh --</center>
+
+```bash
+#!/bin/bash
+
+module load TensorFlow/2.11.0-foss-2022a
+
+python tensorflow_mnist.py
+
+```
+<sub>As you can see this job script will run the Python script named **tensorflow_mnist.py**.</sub>
+
+
+The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command.
+
+```shell
+module swap cluster/{{othercluster}}
+```
+
+!!! Tip
+    
+    When submitting jobs with limited amount of resources, it is recommended to use the debug/interactive cluster[1]: `donphan`. 
+
+
+    To get a list of all clusters and their hardware, see <https://www.ugent.be/hpc/en/infrastructure>.
+
+
+This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command:
+
+```shell
+$ qsub run.sh
+{{jobid}}
+```
+
+This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job.
+
+!!! Warning "Make sure you understand what the `module` command does"
+ 
+    Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, 
+    but our active shell session is still running on the login node.
+    
+    It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on.
+    
+    When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`).
+
+For detailed information about `module` commands, read the running batch jobs[2] chapter.
+
+[1]: interactive_debug.md#interactive-and-debug-cluster
+[2]: running_batch_jobs.md
+
+reference: docs.hpc.ugent.be/getting_started_copy/#submitting-a-job
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
new file mode 100644
index 00000000000..94dc30f6712
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt	
@@ -0,0 +1,21 @@
+
+Now that you can login, it is time to transfer files from your local computer to your **home directory** on the HPC-UGent infrastructure.
+
+Download tensorflow_mnist.py[1] 
+and run.sh[2] example scripts to your computer (from here[3]).
+
+
+When running `ls` in your session on the HPC-UGent infrastructure, you should see the two files listed in your home directory (`~`):
+
+```shell
+$ ls ~
+run.sh tensorflow_mnist.py
+```
+
+When you do not see these files, make sure you uploaded the files to your **home directory**.
+
+[1]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
+[2]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
+[3]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist
+
+reference: docs.hpc.ugent.be/getting_started_copy/#transfer-your-files
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
new file mode 100644
index 00000000000..de177946cf9
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt	
@@ -0,0 +1,26 @@
+
+Your job is put into a queue before being executed, so it may take a while before it actually starts.
+(see when will my job start?[1] for scheduling policy).
+
+You can get an overview of the active jobs using the `qstat` command:
+<pre><code>$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:00  <b style="color:orange">Q</b> {{othercluster}}
+</code></pre> 
+
+Eventually, after entering `qstat` again you should see that your job has started running:
+<pre><code>$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:01  <b style="color:green">R</b> {{othercluster}}
+</code></pre> 
+
+If you don't see your job in the output of the `qstat` command anymore, your job has likely completed.
+
+Read this section[2] on how to interpret the output.
+
+[1]: running_batch_jobs.md#when-will-my-job-start
+[2]: running_batch_jobs.md#monitoring-and-managing-your-jobs
+
+reference: docs.hpc.ugent.be/getting_started_copy/#wait-for-job-to-be-executed
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
new file mode 100644
index 00000000000..e756b9a3cbe
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-access
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
new file mode 100644
index 00000000000..bac5dfcbfbe
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt	
@@ -0,0 +1,18 @@
+it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. 
+
+Assuming you have already generated SSH keys in the previous step (Getting Access[1]), and that they are in a default location, you should now be able to login by running the following command:
+
+<pre><code>ssh {{userid}}@{{loginnode}}</code></pre>
+
+!!! Warning "User your own VSC account id"
+    
+    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
+
+!!! Tip
+
+    You can also still use the web portal (see shell access on web portal[2])
+
+[1]: #getting-access
+[2]: web_portal.md#shell-access
+
+reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-connected
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt
new file mode 100644
index 00000000000..f0b9d83bed3
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-started
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
new file mode 100644
index 00000000000..441b54c7042
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Linux/getting_started_copy/#inspect-your-results
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
new file mode 100644
index 00000000000..d72ffccf01a
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Linux/getting_started_copy/#next-steps
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
new file mode 100644
index 00000000000..744c2c3db7a
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Linux/getting_started_copy/#submitting-a-job
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
new file mode 100644
index 00000000000..aca6e05d28c
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt	
@@ -0,0 +1,21 @@
+
+On your local machine you can run:
+<pre><code>curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
+curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
+</code></pre>
+
+Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC).
+<pre><code>scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ </code></pre>
+<pre><code>ssh  {{userid}}@{{ loginnode }} </code></pre>
+
+!!! Warning "User your own VSC account id"
+    
+    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
+
+!!! Info
+
+    For more information about transfering files or `scp`, see tranfer files from/to hpc[1].
+
+[1]: connecting.md#transfer-files-tofrom-the-hpc
+
+reference: docs.hpc.ugent.be/Linux/getting_started_copy/#transfer-your-files
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
new file mode 100644
index 00000000000..93e6fdff171
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Linux/getting_started_copy/#wait-for-job-to-be-executed
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
new file mode 100644
index 00000000000..8732e586981
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-access
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
new file mode 100644
index 00000000000..2b1de2be838
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt	
@@ -0,0 +1,13 @@
+it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. 
+The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required).
+
+See shell access[2] when using the web portal, or
+connection to the HPC-UGent infrastructure[3] when using a terminal.
+
+Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps.
+
+[1]: web_portal.md
+[2]: web_portal.md#shell-access
+[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure
+
+reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-connected
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt
new file mode 100644
index 00000000000..4e60f862a0a
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-started
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
new file mode 100644
index 00000000000..f7ae9f96226
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/macOS/getting_started_copy/#inspect-your-results
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
new file mode 100644
index 00000000000..71f384bcf17
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/macOS/getting_started_copy/#next-steps
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
new file mode 100644
index 00000000000..d72ba48195a
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/macOS/getting_started_copy/#submitting-a-job
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
new file mode 100644
index 00000000000..fce05042ab2
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt	
@@ -0,0 +1,21 @@
+
+On your local machine you can run:
+<pre><code>curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
+curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
+</code></pre>
+
+Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC).
+<pre><code>scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ </code></pre>
+<pre><code>ssh  {{userid}}@{{ loginnode }} </code></pre>
+
+!!! Warning "User your own VSC account id"
+    
+    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
+
+!!! Info
+
+    For more information about transfering files or `scp`, see tranfer files from/to hpc[1].
+
+[1]: connecting.md#transfer-files-tofrom-the-hpc
+
+reference: docs.hpc.ugent.be/macOS/getting_started_copy/#transfer-your-files
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
new file mode 100644
index 00000000000..2ef8770504b
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/macOS/getting_started_copy/#wait-for-job-to-be-executed
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
new file mode 100644
index 00000000000..874af365704
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-access
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
new file mode 100644
index 00000000000..ce0b873b2b0
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt	
@@ -0,0 +1,13 @@
+it is recommended to use the web portal.
+The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required).
+
+See shell access[2] when using the web portal, or
+connection to the HPC-UGent infrastructure[3] when using a terminal.
+
+Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps.
+
+[1]: web_portal.md
+[2]: web_portal.md#shell-access
+[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure
+
+reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-connected
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt
new file mode 100644
index 00000000000..44d1f17b73b
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-started
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
new file mode 100644
index 00000000000..730fbbc3b74
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Windows/getting_started_copy/#inspect-your-results
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
new file mode 100644
index 00000000000..55df915125a
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Windows/getting_started_copy/#next-steps
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
new file mode 100644
index 00000000000..f67d48ece4a
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Windows/getting_started_copy/#submitting-a-job
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
new file mode 100644
index 00000000000..dce86fc7cf3
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt	
@@ -0,0 +1,15 @@
+
+The HPC-UGent web portal[1] provides a file browser that allows uploading files.
+For more information see the file browser section[2].
+
+Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell.
+
+!!! Info
+
+    As an alternative, you can use WinSCP (see our section[3])
+
+[1]: https://login.hpc.ugent.be
+[2]: web_portal.md#file-browser
+[3]: connecting.md#winscp
+
+reference: docs.hpc.ugent.be/Windows/getting_started_copy/#transfer-your-files
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
new file mode 100644
index 00000000000..bdd7387e379
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt	
@@ -0,0 +1,2 @@
+
+reference: docs.hpc.ugent.be/Windows/getting_started_copy/#wait-for-job-to-be-executed
diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py
new file mode 100644
index 00000000000..d9fd8f1c5ce
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/jinja_parser.py	
@@ -0,0 +1,3 @@
+from jinja2 import Template
+
+# I shall do this tomorrow, I do not find myself in the possession of enough understanding about jinja to commence with this task today
diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
index 7f74fb12fb2..0331e61cfc5 100644
--- a/scripts/HPC chatbot preprocessor/main.py	
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -22,9 +22,13 @@
 
 # Test with actual document
 
+# make a copies directory to store the copies
+if not os.path.exists(".\\copies"):
+    os.mkdir(".\\copies")
+
 # make a copy of one of the md files to test some things
-shutil.copyfile("C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\getting_started.md",
-                "C:\\HPC_werk\\Chatbot\\getting_started_copy.md")
+shutil.copyfile("..\\..\\mkdocs\\docs\\HPC\\getting_started.md",
+                ".\\copies\\getting_started_copy.md")
 
 ################### define global variables ###################
 # variable for the filename (which will be changed into something else in the final version)
@@ -34,10 +38,10 @@
 main_title = filename[:-3]
 
 # variable that keeps track of the directories that are used to write in at different levels
-root_dir_generic = "C:\\HPC_werk\\Chatbot\\parsed_mds\\generic\\"
-root_dir_os_specific_linux = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\linux\\"
-root_dir_os_specific_windows = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\windows\\"
-root_dir_os_specific_macos = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\macos\\"
+root_dir_generic = ".\\copies\\parsed_mds\\generic\\"
+root_dir_os_specific_linux = ".\\copies\\parsed_mds\\os_specific\\linux\\"
+root_dir_os_specific_windows = ".\\copies\\parsed_mds\\os_specific\\windows\\"
+root_dir_os_specific_macos = ".\\copies\\parsed_mds\\os_specific\\macos\\"
 curr_dirs = [filename[:-3] for i in range(4)]
 
 # variable to keep track whether we're dealing with OS-specific info or not
@@ -162,6 +166,12 @@ def replace_markdown_markers(curr_line, linklist):
             curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]")
             linklist.append(match[1])
 
+    # TODO:
+    # code-blocks
+    # tips
+    # warnings
+    # etc
+
     return curr_line, linklist
 
 
@@ -331,7 +341,9 @@ def write_end_of_file(file_location, OS, linklist):
 remove_directory_tree(root_dir_os_specific_macos)
 
 # create directories for the source markdown file
+create_directory(".\\copies\\parsed_mds")
 create_directory(root_dir_generic)
+create_directory(".\\copies\\parsed_mds\\os_specific")
 create_directory(root_dir_os_specific_linux)
 create_directory(root_dir_os_specific_windows)
 create_directory(root_dir_os_specific_macos)
@@ -341,7 +353,7 @@ def write_end_of_file(file_location, OS, linklist):
 create_directory(root_dir_os_specific_macos + curr_dirs[0])
 
 # open the file and store line by line in the right file
-with open("C:\\HPC_werk\\Chatbot\\getting_started_copy.md", 'r') as readfile:
+with open(".\\copies\\" + filename, 'r') as readfile:
 
     for line in readfile:
         title_level, title, directory = check_for_title(line)
diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py
index 50b61cd5213..5661c79ddc9 100644
--- a/scripts/HPC chatbot preprocessor/start_checker.py	
+++ b/scripts/HPC chatbot preprocessor/start_checker.py	
@@ -1,3 +1,5 @@
+# THIS IS NOT AN IMPORTANT FILE, DON'T WORRY ABOUT IT, I JUST USED IT TO TEST SOME THINGS
+
 import os
 
 directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC"

From 85a93ec31da14d3877658a741fd882184695b6cb Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 9 Aug 2024 10:39:56 +0200
Subject: [PATCH 003/152] used jinja to replace macros

---
 .../HPC chatbot preprocessor/if_mangler.py    |  47 +++++++
 .../HPC chatbot preprocessor/jinja_parser.py  |  24 +++-
 scripts/HPC chatbot preprocessor/main.py      | 124 ++++++------------
 3 files changed, 109 insertions(+), 86 deletions(-)
 create mode 100644 scripts/HPC chatbot preprocessor/if_mangler.py

diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py
new file mode 100644
index 00000000000..f49ef691fd3
--- /dev/null
+++ b/scripts/HPC chatbot preprocessor/if_mangler.py	
@@ -0,0 +1,47 @@
+import re
+import os
+
+
+def create_directory(new_directory):
+    if not os.path.exists(new_directory):
+        os.mkdir(new_directory)
+
+create_directory(".\\if_mangled_files")
+
+# global variable to keep track of latest if-statement scope
+is_os = False
+
+
+def mangle_os_ifs(line):
+    global is_os
+
+    match = re.search(r'\{%-\s[^%]*%}', line)
+    if_match = re.search(r'\{%-\sif [^%]*%}', line)
+    if_os_match = re.search(r'\{%-\sif OS == [^%]*%}', line)
+
+    if match:
+        if if_match:
+            if if_os_match:
+                is_os = True
+                line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:]
+            else:
+                is_os = False
+        else:
+            if is_os:
+                line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:]
+
+    match = re.search(r'\{%-\s[^%]*%}', line)
+
+    while match and is_os:
+        line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:]
+        match = re.search(r'\{%-\s[^%]*%}', line)
+
+    return line
+
+
+def mangle_ifs(directory, file):
+    with open(".\\if_mangled_files\\" + file, 'w') as write_file:
+        with open(directory + "\\" + file, 'r') as read_file:
+            for line in read_file:
+                new_line = mangle_os_ifs(line)
+                write_file.write(new_line)
diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py
index d9fd8f1c5ce..1b033bfdc6c 100644
--- a/scripts/HPC chatbot preprocessor/jinja_parser.py	
+++ b/scripts/HPC chatbot preprocessor/jinja_parser.py	
@@ -1,3 +1,25 @@
+import yaml
 from jinja2 import Template
+from if_mangler import mangle_ifs
 
-# I shall do this tomorrow, I do not find myself in the possession of enough understanding about jinja to commence with this task today
+
+# function that let's jinja do its thing to format the files expect for the os-related if-statements
+def jinja_parser(filename):
+    # Read the YAML file
+    with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file:
+        words_dict = yaml.safe_load(yml_file)
+
+    # Mangle the OS-related if-statements
+    mangle_ifs('.\\copies', filename)
+
+    # Read the if-mangled Markdown file
+    with open('.\\if_mangled_files\\' + filename, 'r') as md_file:
+        md_content = md_file.read()
+
+    # Use Jinja2 to replace the macros
+    template = Template(md_content)
+    rendered_content = template.render(words_dict)
+
+    # Save the rendered content to a new file
+    with open('.\\copies\\' + filename, 'w') as output_file:
+        output_file.write(rendered_content)
diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
index 0331e61cfc5..35769de46ab 100644
--- a/scripts/HPC chatbot preprocessor/main.py	
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -1,6 +1,7 @@
 import os
 import re
 import shutil
+from jinja_parser import jinja_parser
 
 # test_number = int(input("Which test should be run?"))
 #
@@ -26,6 +27,9 @@
 if not os.path.exists(".\\copies"):
     os.mkdir(".\\copies")
 
+if not os.path.exists(".\\parsed_mds"):
+    os.mkdir(".\\parsed_mds")
+
 # make a copy of one of the md files to test some things
 shutil.copyfile("..\\..\\mkdocs\\docs\\HPC\\getting_started.md",
                 ".\\copies\\getting_started_copy.md")
@@ -38,10 +42,10 @@
 main_title = filename[:-3]
 
 # variable that keeps track of the directories that are used to write in at different levels
-root_dir_generic = ".\\copies\\parsed_mds\\generic\\"
-root_dir_os_specific_linux = ".\\copies\\parsed_mds\\os_specific\\linux\\"
-root_dir_os_specific_windows = ".\\copies\\parsed_mds\\os_specific\\windows\\"
-root_dir_os_specific_macos = ".\\copies\\parsed_mds\\os_specific\\macos\\"
+root_dir_generic = ".\\parsed_mds\\generic\\"
+root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\"
+root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\"
+root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\"
 curr_dirs = [filename[:-3] for i in range(4)]
 
 # variable to keep track whether we're dealing with OS-specific info or not
@@ -62,17 +66,10 @@
 links_windows = []
 links_macos = []
 
-# dictionaries to keep track of current OS and location
+# dictionaries to keep track of current OS
 active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
-active_site_if_states = {"Gent": "inactive", "not-Gent": "inactive"}
-
-# variable to keep track of the type of if-statement
-if_type = "OS"
-
-# variable to keep track of the macro-replacements at the top of markdown files
-replacements = {}
 
-# variable that is used to detect whether the first title has been encountered yet
+# variable that shows whether the first title has been reached yet
 after_first_title = False
 
 
@@ -84,13 +81,6 @@ def remove_directory_tree(old_directory):
         shutil.rmtree(old_directory)
 
 
-# function that checks the first lines of a file until a title is found and saves the macro-replacements to the list
-def save_replacements(curr_line):
-    global replacements
-    match = re.search(r'\{% set (.*?)="(.*?)" %}', curr_line)
-    replacements[match.group(1)] = match.group(2)
-
-
 # function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title)
 def check_for_title_logic(curr_line):
     global curr_dirs
@@ -151,13 +141,6 @@ def update_lower_curr_dir(curr_directory, level):
 # function that replaces certain markdown structures with the equivalent used on the website
 def replace_markdown_markers(curr_line, linklist):
 
-    # replace {{hpcinfra}}
-    curr_line = re.sub(r'\{\{\s*hpcinfra\s*}}', "HPC-UGent infrastructure", curr_line)
-
-    # replace other replacement macros
-    for macro in replacements.keys():
-        curr_line = re.sub(r'\{\{\s*' + re.escape(macro) + r'\s*}}', replacements[macro], curr_line)
-
     # replace links with a reference
     matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line)
     if matches:
@@ -177,13 +160,12 @@ def replace_markdown_markers(curr_line, linklist):
 
 # function that checks for if-statements
 def check_if_statements(curr_line):
-    global if_type
 
     # check whether the first part of the line contains information wrt if-statements
-    match = re.search(r'^\{%-\s([^%]*)%}(.*)', curr_line)
+    match = re.search(r'^\{-if-%-\s([^%]*)%-if-}(.*)', curr_line)
 
     # check whether the line contains information wrt if-statements that is not in its first part
-    match_large = re.search(r'^(.*)(\{%-\s[^%]*%})(.*)', curr_line)
+    match_large = re.search(r'^(.*)(\{-if-%-\s[^%]*%-if-})(.*)', curr_line)
 
     if match:
         print("################################################################################")
@@ -202,34 +184,16 @@ def check_if_statements(curr_line):
                 if other_OS != OS and active_OS_if_states[other_OS] == "active":
                     active_OS_if_states[other_OS] = "inactive"
 
-            if_type = "OS"
-
-        # new if-statement wrt site
-        elif re.match(r'if site == ', content):
-            if re.search(r'(?i)gent', content):
-                active_site_if_states["Gent"] = "active"
-                active_site_if_states["not-Gent"] = "inactive"
-            else:
-                active_site_if_states["not-Gent"] = "active"
-                if active_site_if_states["Gent"] == "active":
-                    active_site_if_states["Gent"] = "inactive"
-            if_type = "site"
-
         # endif statement wrt OS
-        elif re.match(r'endif ', content) and if_type == "OS":
+        elif re.match(r'endif ', content):
             if str(1) in active_OS_if_states.values():
                 active_OS_if_states[list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active"
             else:
                 for key in active_OS_if_states.keys():
                     active_OS_if_states[key] = "inactive"
 
-        # endif statement wrt site
-        elif re.match(r'endif ', content) and if_type == "site":
-            for key in active_site_if_states.keys():
-                active_site_if_states[key] = "inactive"
-
         # else statement wrt OS
-        elif re.match(r'else ', content) and if_type == "OS":
+        elif re.match(r'else ', content):
 
             i = 0
             for i in range(3):
@@ -246,18 +210,7 @@ def check_if_statements(curr_line):
                 position = list(active_OS_if_states.values()).index("inactive")
                 active_OS_if_states[key_list[position]] = "active"
 
-        # else statement wrt site
-        elif re.match(r'else ', content) and if_type == "site":
-
-            # change state of "Gent" and set not-Gent on active
-            if active_site_if_states["Gent"] == "inactive":
-                active_site_if_states["Gent"] = "active"
-            elif active_site_if_states["Gent"] == "active":
-                active_site_if_states["Gent"] = str(0)
-            active_site_if_states["not-Gent"] = "active"
-
         print(active_OS_if_states)
-        print(active_site_if_states)
 
         if len(match.group(2)) != 0:
             extra_message = match.group(2).lstrip()
@@ -273,7 +226,6 @@ def check_if_statements(curr_line):
     elif match_large:
         print("################################################################################")
         print(active_OS_if_states)
-        print(active_site_if_states)
         print(match_large.group(1))
         print(match_large.group(2))
         print("write_text_and_check_extra_message")
@@ -301,15 +253,14 @@ def write_text_to_file(file_name, curr_line):
 # function that decides what file to write text to
 def choose_and_write_to_file(curr_line):
     # check that the line is part of the website for gent
-    if active_site_if_states["Gent"] == "active" or active_site_if_states["Gent"] == "inactive" and active_site_if_states["not-Gent"] == "inactive":
-        if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive":
-            write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line)
-        if active_OS_if_states["linux"] == "active":
-            write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line)
-        if active_OS_if_states["windows"] == "active":
-            write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line)
-        if active_OS_if_states["macos"] == "active":
-            write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line)
+    if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive":
+        write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line)
+    if active_OS_if_states["linux"] == "active":
+        write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line)
+    if active_OS_if_states["windows"] == "active":
+        write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line)
+    if active_OS_if_states["macos"] == "active":
+        write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line)
 
 
 # function that adds a reference link at the end of every txt file
@@ -325,6 +276,7 @@ def write_end_of_file(file_location, OS, linklist):
 
     # add the links from within the document
     with open(file_location, 'a') as write_file:
+        write_file.write("\n\n")
         for i, link in enumerate(linklist):
             write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n")
 
@@ -341,9 +293,8 @@ def write_end_of_file(file_location, OS, linklist):
 remove_directory_tree(root_dir_os_specific_macos)
 
 # create directories for the source markdown file
-create_directory(".\\copies\\parsed_mds")
 create_directory(root_dir_generic)
-create_directory(".\\copies\\parsed_mds\\os_specific")
+create_directory(".\\parsed_mds\\os_specific")
 create_directory(root_dir_os_specific_linux)
 create_directory(root_dir_os_specific_windows)
 create_directory(root_dir_os_specific_macos)
@@ -352,6 +303,9 @@ def write_end_of_file(file_location, OS, linklist):
 create_directory(root_dir_os_specific_windows + curr_dirs[0])
 create_directory(root_dir_os_specific_macos + curr_dirs[0])
 
+# process the jinja macros
+jinja_parser(filename)
+
 # open the file and store line by line in the right file
 with open(".\\copies\\" + filename, 'r') as readfile:
 
@@ -366,22 +320,22 @@ def write_end_of_file(file_location, OS, linklist):
             after_first_title = True
 
         # line is not a title
-        else:
-            if after_first_title:
-                # check for if-statements and write the appropriate lines in the right files
-                next_action = check_if_statements(line)
-                while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
-                    if next_action[0] == "write_text_and_check_extra_message":
-                        choose_and_write_to_file(next_action[2])
-                    next_action = check_if_statements(next_action[1])
-
-                if next_action[0] == "write_text":
+        elif after_first_title:
+            # check for if-statements and write the appropriate lines in the right files
+            next_action = check_if_statements(line)
+            while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
+                if next_action[0] == "write_text_and_check_extra_message":
                     choose_and_write_to_file(next_action[2])
-            else:
-                save_replacements(line)
+                next_action = check_if_statements(next_action[1])
+
+            if next_action[0] == "write_text":
+                choose_and_write_to_file(next_action[2])
 
 # write end of file for the last file
 write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
 write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux)
 write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows)
 write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos)
+
+
+# TODO: directory cleanup

From dfff5fabae20307d13cef4f80d22943f7eac87f1 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 9 Aug 2024 14:34:01 +0200
Subject: [PATCH 004/152] adapt if-mangler to accommodate for nested if-clauses

---
 .../HPC chatbot preprocessor/if_mangler.py    | 64 +++++++++++--------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py
index f49ef691fd3..9980a2e83e8 100644
--- a/scripts/HPC chatbot preprocessor/if_mangler.py	
+++ b/scripts/HPC chatbot preprocessor/if_mangler.py	
@@ -1,41 +1,53 @@
 import re
-import os
-
-
-def create_directory(new_directory):
-    if not os.path.exists(new_directory):
-        os.mkdir(new_directory)
-
-create_directory(".\\if_mangled_files")
 
 # global variable to keep track of latest if-statement scope
-is_os = False
+is_os = 0 # Can be 0, 1 or 2 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if}
 
 
 def mangle_os_ifs(line):
     global is_os
 
-    match = re.search(r'\{%-\s[^%]*%}', line)
-    if_match = re.search(r'\{%-\sif [^%]*%}', line)
-    if_os_match = re.search(r'\{%-\sif OS == [^%]*%}', line)
+    match = re.search(r'\{%(.*?)%}(.*)', line)
 
-    if match:
-        if if_match:
-            if if_os_match:
-                is_os = True
-                line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:]
-            else:
-                is_os = False
-        else:
-            if is_os:
-                line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:]
+    start_index = 0
+    added_length = 0
 
-    match = re.search(r'\{%-\s[^%]*%}', line)
+    while match:
 
-    while match and is_os:
-        line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:]
-        match = re.search(r'\{%-\s[^%]*%}', line)
+        constr_match = re.search(r'\{%.*?%}', match.string)
+        if_match = re.search(r'if ', match.group(1))
+        if_os_match = re.search(r'if OS == ', match.group(1))
+        endif_match = re.search(r'endif', match.group(1))
 
+        if endif_match:
+            if is_os == 2:
+                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
+                added_length += 8
+                is_os = 0
+            elif is_os == 1:
+                is_os = 2
+        elif if_match:
+            if if_os_match:
+                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
+                added_length += 8
+                is_os = 2
+            else:
+                if is_os == 2:
+                    is_os = 1
+                else:
+                    is_os = 0
+        else:
+            if is_os == 2:
+                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
+                added_length += 8
+        start_index += constr_match.end()
+        match = re.search(r'\{%(.*?)%}(.*)', match.group(2))
     return line
 
 

From 649ddec3fcad3655445aa930f75bf4dd82a9504f Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 9 Aug 2024 14:34:43 +0200
Subject: [PATCH 005/152] adapt the parser to take all files as input, not all
 files get parsed successfully yet

---
 scripts/HPC chatbot preprocessor/main.py | 282 ++++++++++++-----------
 1 file changed, 150 insertions(+), 132 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
index 35769de46ab..86bd2ed9c3f 100644
--- a/scripts/HPC chatbot preprocessor/main.py	
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -3,78 +3,48 @@
 import shutil
 from jinja_parser import jinja_parser
 
-# test_number = int(input("Which test should be run?"))
-#
-# # Test for strip_markdown (somewhat successful, see findings file)
-#
-# if test_number == 1:
-#     import strip_markdown
-#
-#     strip_markdown.strip_markdown_file("C:\\HPC werk\\Chatbot\\md_to_plaintext_test.md")
-#
-# # Test if copy of document doesn't change original document (successful)
-# if test_number == 2:
-#     import shutil
-#
-#     shutil.copyfile("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test.txt",
-#                     "C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt")
-#     with open("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt", 'w') as file:
-#         file.write('hello')
-
-# Test with actual document
-
-# make a copies directory to store the copies
+# variables for analytics
+succeeded = 0
+failed = 0
+
+# make the necessary directories
 if not os.path.exists(".\\copies"):
     os.mkdir(".\\copies")
 
 if not os.path.exists(".\\parsed_mds"):
     os.mkdir(".\\parsed_mds")
 
-# make a copy of one of the md files to test some things
-shutil.copyfile("..\\..\\mkdocs\\docs\\HPC\\getting_started.md",
-                ".\\copies\\getting_started_copy.md")
+if not os.path.exists(".\\if_mangled_files"):
+    os.mkdir(".\\if_mangled_files")
+
+# copy the examples to the right location wrt the script in order to allow jinja to work
+if not os.path.exists(".\\examples"):
+    shutil.copytree("..\\..\\mkdocs\\docs\\HPC\\examples", ".\\examples")
 
 ################### define global variables ###################
-# variable for the filename (which will be changed into something else in the final version)
-filename = "getting_started_copy.md"
 
-# variable for the main title (needed for reference links)
-main_title = filename[:-3]
+# variable that keeps track of the source directories
+source_directories = ["..\\..\\mkdocs\\docs\\HPC\\", "..\\..\\mkdocs\\docs\\HPC\\linux-tutorial"]
 
 # variable that keeps track of the directories that are used to write in at different levels
 root_dir_generic = ".\\parsed_mds\\generic\\"
 root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\"
 root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\"
 root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\"
-curr_dirs = [filename[:-3] for i in range(4)]
-
-# variable to keep track whether we're dealing with OS-specific info or not
-OS_specific = False
-
-# pattern for the regex if-statement to filter out markdown titles
-if_pattern = r'^#+ '
-
-# variable that keeps track of the latest non-zero level title and corresponding directory
-last_title_level = 1
-last_title = None
-last_directory = None
-last_was_title = False
 
-# list to keep track of links in the text
-links_generic = []
-links_linux = []
-links_windows = []
-links_macos = []
+# list of all the filenames
+filenames = {}
+for source_directory in source_directories:
+    all_items = os.listdir(source_directory)
+    files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+    for file in files:
+        filenames[file] = os.path.join(source_directory, file)
 
-# dictionaries to keep track of current OS
-active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
 
-# variable that shows whether the first title has been reached yet
-after_first_title = False
+# filenames = {'account.md': '..\\..\\mkdocs\\docs\\HPC\\account.md'}
 
 
 ################### define functions ###################
-
 # function that removes the previous file structure before starting the process of making a new one
 def remove_directory_tree(old_directory):
     if os.path.exists(old_directory):
@@ -84,7 +54,7 @@ def remove_directory_tree(old_directory):
 # function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title)
 def check_for_title_logic(curr_line):
     global curr_dirs
-    match = re.match(if_pattern, curr_line)
+    match = re.match(r'^#+ ', curr_line)
     if match and len(match.group(0)) <= 4:
         return len(match.group(0)) - 1
     else:
@@ -109,12 +79,16 @@ def check_for_title(curr_line):
     else:
         if last_title is not None:
             write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
-            write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux)
-            write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows)
-            write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos)
+            write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
+                              links_linux)
+            write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
+                              links_windows)
+            write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
+                              links_macos)
             reset_link_lists()
 
-        curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + curr_line[logic_output + 1:-1].replace(' ', '-')
+        curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + make_valid_title(
+            curr_line[logic_output + 1:-1].replace(' ', '-'))
 
         create_directory(root_dir_generic + curr_dirs[logic_output])
         create_directory(root_dir_os_specific_linux + curr_dirs[logic_output])
@@ -122,7 +96,7 @@ def check_for_title(curr_line):
         create_directory(root_dir_os_specific_macos + curr_dirs[logic_output])
 
         update_lower_curr_dir(curr_dirs[logic_output], logic_output)
-        return logic_output, curr_line[logic_output + 1:-1].replace(' ', '-'), curr_dirs[logic_output]
+        return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output]
 
 
 # function that creates directories if needed
@@ -140,19 +114,16 @@ def update_lower_curr_dir(curr_directory, level):
 
 # function that replaces certain markdown structures with the equivalent used on the website
 def replace_markdown_markers(curr_line, linklist):
-
     # replace links with a reference
     matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line)
     if matches:
         for match in matches:
-            print(f"[{match[0]}]({match[1]})")
             curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]")
             linklist.append(match[1])
 
-    # TODO:
-    # code-blocks
-    # tips
-    # warnings
+    # TODO: code-blocks
+    # TODO: tips
+    # TODO: warnings
     # etc
 
     return curr_line, linklist
@@ -160,7 +131,7 @@ def replace_markdown_markers(curr_line, linklist):
 
 # function that checks for if-statements
 def check_if_statements(curr_line):
-
+    # TODO: adapt regex for annoying inconsistencies
     # check whether the first part of the line contains information wrt if-statements
     match = re.search(r'^\{-if-%-\s([^%]*)%-if-}(.*)', curr_line)
 
@@ -168,9 +139,7 @@ def check_if_statements(curr_line):
     match_large = re.search(r'^(.*)(\{-if-%-\s[^%]*%-if-})(.*)', curr_line)
 
     if match:
-        print("################################################################################")
         content = match.group(1)
-        print(content)
 
         # new if-statement wrt OS
         if re.match(r'if OS == ', content):
@@ -187,7 +156,8 @@ def check_if_statements(curr_line):
         # endif statement wrt OS
         elif re.match(r'endif ', content):
             if str(1) in active_OS_if_states.values():
-                active_OS_if_states[list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active"
+                active_OS_if_states[
+                    list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active"
             else:
                 for key in active_OS_if_states.keys():
                     active_OS_if_states[key] = "inactive"
@@ -210,25 +180,14 @@ def check_if_statements(curr_line):
                 position = list(active_OS_if_states.values()).index("inactive")
                 active_OS_if_states[key_list[position]] = "active"
 
-        print(active_OS_if_states)
-
         if len(match.group(2)) != 0:
             extra_message = match.group(2).lstrip()
-            print(extra_message)
-            # check_if_statements(extra_message)
-            print("check_extra_message")
             return "check_extra_message", extra_message, None
 
         else:
-            print("done")
             return "done", None, None
 
     elif match_large:
-        print("################################################################################")
-        print(active_OS_if_states)
-        print(match_large.group(1))
-        print(match_large.group(2))
-        print("write_text_and_check_extra_message")
         return "write_text_and_check_extra_message", match_large.group(2), match_large.group(1)
 
     else:
@@ -253,7 +212,8 @@ def write_text_to_file(file_name, curr_line):
 # function that decides what file to write text to
 def choose_and_write_to_file(curr_line):
     # check that the line is part of the website for gent
-    if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive":
+    if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \
+            active_OS_if_states["macos"] == "inactive":
         write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line)
     if active_OS_if_states["linux"] == "active":
         write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line)
@@ -284,58 +244,116 @@ def write_end_of_file(file_location, OS, linklist):
     add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + main_title + "/#" + last_title.lower())
 
 
-################### actually parse the md file ###################
-
-# remove the old directories if needed
-remove_directory_tree(root_dir_generic)
-remove_directory_tree(root_dir_os_specific_linux)
-remove_directory_tree(root_dir_os_specific_windows)
-remove_directory_tree(root_dir_os_specific_macos)
-
-# create directories for the source markdown file
-create_directory(root_dir_generic)
-create_directory(".\\parsed_mds\\os_specific")
-create_directory(root_dir_os_specific_linux)
-create_directory(root_dir_os_specific_windows)
-create_directory(root_dir_os_specific_macos)
-create_directory(root_dir_generic + curr_dirs[0])
-create_directory(root_dir_os_specific_linux + curr_dirs[0])
-create_directory(root_dir_os_specific_windows + curr_dirs[0])
-create_directory(root_dir_os_specific_macos + curr_dirs[0])
-
-# process the jinja macros
-jinja_parser(filename)
-
-# open the file and store line by line in the right file
-with open(".\\copies\\" + filename, 'r') as readfile:
-
-    for line in readfile:
-        title_level, title, directory = check_for_title(line)
-
-        # line is a title with a maximum depth of 3
-        if title_level > 0:
-            last_title_level = title_level
-            last_title = title
-            last_directory = directory
-            after_first_title = True
-
-        # line is not a title
-        elif after_first_title:
-            # check for if-statements and write the appropriate lines in the right files
-            next_action = check_if_statements(line)
-            while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
-                if next_action[0] == "write_text_and_check_extra_message":
-                    choose_and_write_to_file(next_action[2])
-                next_action = check_if_statements(next_action[1])
-
-            if next_action[0] == "write_text":
-                choose_and_write_to_file(next_action[2])
-
-# write end of file for the last file
-write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
-write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux)
-write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows)
-write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos)
-
+# function that makes sure all titles can be used as valid filenames
+def make_valid_title(s):
+    # Define a regex pattern for invalid characters on both Windows and Linux
+    invalid_chars = r'[<>:"/\\|?*\0()]'
+
+    # Remove invalid characters
+    valid_filename = re.sub(invalid_chars, '', s)
+
+    # Strip leading/trailing whitespace
+    valid_filename = valid_filename.strip()
+
+    return valid_filename
+
+
+for filename in filenames.keys():
+    try:
+        # make a copy of one of the md files to test some things
+        shutil.copyfile(filenames[filename],
+                        ".\\copies\\" + filename)
+
+        ################### define/reset loop specific variables ###################
+
+        # variable for the main title (needed for reference links)
+        main_title = filename[:-3]
+
+        # variable that keeps track of the directories that are used to write in at different levels
+        curr_dirs = [filename[:-3] for i in range(4)]
+
+        # variable to keep track whether we're dealing with OS-specific info or not
+        OS_specific = False
+
+        # variable that keeps track of the latest non-zero level title and corresponding directory
+        last_title_level = 1
+        last_title = None
+        last_directory = None
+        last_was_title = False
+
+        # list to keep track of links in the text
+        links_generic = []
+        links_linux = []
+        links_windows = []
+        links_macos = []
+
+        # dictionaries to keep track of current OS
+        active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
+
+        # variable that shows whether the first title has been reached yet
+        after_first_title = False
+
+        ################### actually parse the md file ###################
+
+        # remove the old directories if needed
+        remove_directory_tree(root_dir_generic)
+        remove_directory_tree(root_dir_os_specific_linux)
+        remove_directory_tree(root_dir_os_specific_windows)
+        remove_directory_tree(root_dir_os_specific_macos)
+
+        # create directories for the source markdown file
+        create_directory(root_dir_generic)
+        create_directory(".\\parsed_mds\\os_specific")
+        create_directory(root_dir_os_specific_linux)
+        create_directory(root_dir_os_specific_windows)
+        create_directory(root_dir_os_specific_macos)
+        create_directory(root_dir_generic + curr_dirs[0])
+        create_directory(root_dir_os_specific_linux + curr_dirs[0])
+        create_directory(root_dir_os_specific_windows + curr_dirs[0])
+        create_directory(root_dir_os_specific_macos + curr_dirs[0])
+
+        # process the jinja macros
+        jinja_parser(filename)
+
+        # open the file and store line by line in the right file
+        with open(".\\copies\\" + filename, 'r') as readfile:
+
+            for line in readfile:
+                title_level, title, directory = check_for_title(line)
+
+                # line is a title with a maximum depth of 3
+                if title_level > 0:
+                    last_title_level = title_level
+                    last_title = title
+                    last_directory = directory
+                    after_first_title = True
+
+                # line is not a title
+                elif after_first_title:
+                    # check for if-statements and write the appropriate lines in the right files
+                    next_action = check_if_statements(line)
+                    while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
+                        if next_action[0] == "write_text_and_check_extra_message":
+                            choose_and_write_to_file(next_action[2])
+                        next_action = check_if_statements(next_action[1])
+
+                    if next_action[0] == "write_text":
+                        choose_and_write_to_file(next_action[2])
+
+        # write end of file for the last file
+        write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
+        write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
+                          links_linux)
+        write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
+                          links_windows)
+        write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
+                          links_macos)
+        print("Parsing succeeded for file: " + filename)
+        succeeded += 1
+    except:
+        print("Parsing failed for file: " + filename)
+        failed += 1
+
+print("Success ratio: " + str(succeeded/(succeeded + failed) * 100) + "%")
 
 # TODO: directory cleanup

From 2116d6e2412e56c48af0e2f032535f687836859c Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 9 Aug 2024 16:42:44 +0200
Subject: [PATCH 006/152] adapt the parser to take all files as input, not all
 files get parsed successfully yet

---
 scripts/HPC chatbot preprocessor/main.py | 38 +++++++++++++++---------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
index 86bd2ed9c3f..b2900334ef8 100644
--- a/scripts/HPC chatbot preprocessor/main.py	
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -17,10 +17,6 @@
 if not os.path.exists(".\\if_mangled_files"):
     os.mkdir(".\\if_mangled_files")
 
-# copy the examples to the right location wrt the script in order to allow jinja to work
-if not os.path.exists(".\\examples"):
-    shutil.copytree("..\\..\\mkdocs\\docs\\HPC\\examples", ".\\examples")
-
 ################### define global variables ###################
 
 # variable that keeps track of the source directories
@@ -40,8 +36,10 @@
     for file in files:
         filenames[file] = os.path.join(source_directory, file)
 
+# TODO: find solution for duplicate filenames between linux tutorial and normal files
 
-# filenames = {'account.md': '..\\..\\mkdocs\\docs\\HPC\\account.md'}
+# TODO: problem-files (other layout than normal markdown-files)
+problem_files = ["linux_tutorial\\getting_started.md", "linux_tutorial\\navigating.md"]
 
 
 ################### define functions ###################
@@ -72,9 +70,9 @@ def reset_link_lists():
 
 # function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables
 def check_for_title(curr_line):
-    global curr_dirs, last_title
+    global curr_dirs, last_title, in_code_block
     logic_output = check_for_title_logic(curr_line)
-    if logic_output == 0:
+    if logic_output == 0 or in_code_block:
         return 0, None, None
     else:
         if last_title is not None:
@@ -99,6 +97,13 @@ def check_for_title(curr_line):
         return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output]
 
 
+# function used to detect codeblocks and make sure the comments don't get detected as titles
+def detect_in_code_block(curr_line):
+    global in_code_block
+    if '```' in curr_line or (('<pre><code>' in curr_line) ^ ('</code></pre>' in curr_line)):
+        in_code_block = not in_code_block
+
+
 # function that creates directories if needed
 def create_directory(new_directory):
     if not os.path.exists(new_directory):
@@ -227,6 +232,7 @@ def choose_and_write_to_file(curr_line):
 def add_reference_link(file_location, reference_link):
     with open(file_location, 'a') as write_file:
         write_file.write("\nreference: " + reference_link + "\n")
+    # TODO: fix trailing spaces in filename
 
 
 # function that adds the links that should be at the end of a file
@@ -260,6 +266,7 @@ def make_valid_title(s):
 
 for filename in filenames.keys():
     try:
+    # if True:
         # make a copy of one of the md files to test some things
         shutil.copyfile(filenames[filename],
                         ".\\copies\\" + filename)
@@ -293,13 +300,10 @@ def make_valid_title(s):
         # variable that shows whether the first title has been reached yet
         after_first_title = False
 
-        ################### actually parse the md file ###################
+        # variable that is used to be sure that we are detecting titles and not comments from codeblocks
+        in_code_block = False
 
-        # remove the old directories if needed
-        remove_directory_tree(root_dir_generic)
-        remove_directory_tree(root_dir_os_specific_linux)
-        remove_directory_tree(root_dir_os_specific_windows)
-        remove_directory_tree(root_dir_os_specific_macos)
+        ################### actually parse the md file ###################
 
         # create directories for the source markdown file
         create_directory(root_dir_generic)
@@ -321,6 +325,8 @@ def make_valid_title(s):
             for line in readfile:
                 title_level, title, directory = check_for_title(line)
 
+                detect_in_code_block(line)
+
                 # line is a title with a maximum depth of 3
                 if title_level > 0:
                     last_title_level = title_level
@@ -341,6 +347,9 @@ def make_valid_title(s):
                         choose_and_write_to_file(next_action[2])
 
         # write end of file for the last file
+        # print(root_dir_generic)
+        # print(last_directory)
+        # print(filename)
         write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
         write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
                           links_linux)
@@ -348,12 +357,13 @@ def make_valid_title(s):
                           links_windows)
         write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
                           links_macos)
-        print("Parsing succeeded for file: " + filename)
         succeeded += 1
     except:
         print("Parsing failed for file: " + filename)
         failed += 1
 
 print("Success ratio: " + str(succeeded/(succeeded + failed) * 100) + "%")
+print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.")
 
 # TODO: directory cleanup
+# TODO: reconsider maximum depth to be detected as title

From 159aa62af18dd76b5567c00a98a08c16081d9773 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 9 Aug 2024 16:43:09 +0200
Subject: [PATCH 007/152] small update, not important

---
 .../HPC chatbot preprocessor/start_checker.py    | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py
index 5661c79ddc9..b328e7ab80c 100644
--- a/scripts/HPC chatbot preprocessor/start_checker.py	
+++ b/scripts/HPC chatbot preprocessor/start_checker.py	
@@ -17,3 +17,19 @@
                     lines_until_title += 1
             print(filename + " : " + str(lines_until_title))
     break
+
+directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\linux-tutorial"
+
+for dirpath, dirnames, filenames in os.walk(directory):
+    for filename in filenames:
+        # if filename.endswith("xdmod.md"):
+        #     break
+        if filename.endswith(".md"):
+            lines_until_title = 0
+            with open(directory + "\\" + filename, "r") as file:
+                for line in file:
+                    if line[0] == "#":
+                        break
+                    lines_until_title += 1
+            print(filename + " : " + str(lines_until_title))
+    break

From 75765e555edb9bc67ebcaf0136ac5efc8d0461ad Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 9 Aug 2024 16:43:47 +0200
Subject: [PATCH 008/152] change to the templates

---
 scripts/HPC chatbot preprocessor/jinja_parser.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py
index 1b033bfdc6c..612c20dd06b 100644
--- a/scripts/HPC chatbot preprocessor/jinja_parser.py	
+++ b/scripts/HPC chatbot preprocessor/jinja_parser.py	
@@ -1,5 +1,5 @@
 import yaml
-from jinja2 import Template
+from jinja2 import Template, FileSystemLoader, Environment, ChoiceLoader
 from if_mangler import mangle_ifs
 
 
@@ -17,7 +17,9 @@ def jinja_parser(filename):
         md_content = md_file.read()
 
     # Use Jinja2 to replace the macros
-    template = Template(md_content)
+    templateloader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")])
+    templateEnv = Environment(loader=templateloader)
+    template = templateEnv.get_template(filename)
     rendered_content = template.render(words_dict)
 
     # Save the rendered content to a new file

From 57d9cfe5f25c66f8a6c2721fca9eaac1e6eea25d Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 9 Aug 2024 16:44:18 +0200
Subject: [PATCH 009/152] change to accommodate for more nested if-clauses

---
 .../HPC chatbot preprocessor/if_mangler.py    | 25 ++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py
index 9980a2e83e8..8dd0d099452 100644
--- a/scripts/HPC chatbot preprocessor/if_mangler.py	
+++ b/scripts/HPC chatbot preprocessor/if_mangler.py	
@@ -1,7 +1,7 @@
 import re
 
 # global variable to keep track of latest if-statement scope
-is_os = 0 # Can be 0, 1 or 2 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if}
+is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
 
 
 def mangle_os_ifs(line):
@@ -26,22 +26,35 @@ def mangle_os_ifs(line):
                                                                                                                                                                                                                      constr_match.end() + start_index + added_length - 1:]
                 added_length += 8
                 is_os = 0
-            elif is_os == 1:
-                is_os = 2
-        elif if_match:
-            if if_os_match:
+            if is_os == 3:
                 line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
                                                                                                constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
                                                                                                                                                                                                                      constr_match.end() + start_index + added_length - 1:]
                 added_length += 8
                 is_os = 2
+            elif is_os == 1:
+                is_os = 2
+        elif if_match:
+            if if_os_match:
+                if is_os == 2:
+                    line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                                   constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                         constr_match.end() + start_index + added_length - 1:]
+                    added_length += 8
+                    is_os = 3
+                else:
+                    line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                                   constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                         constr_match.end() + start_index + added_length - 1:]
+                    added_length += 8
+                    is_os = 2
             else:
                 if is_os == 2:
                     is_os = 1
                 else:
                     is_os = 0
         else:
-            if is_os == 2:
+            if is_os == 2 or is_os == 3:
                 line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
                                                                                                constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
                                                                                                                                                                                                                      constr_match.end() + start_index + added_length - 1:]

From 75d345b1bd41325c5a4242251c093c6b396d3e21 Mon Sep 17 00:00:00 2001
From: EwDa291 <100782488+EwDa291@users.noreply.github.com>
Date: Fri, 9 Aug 2024 16:45:53 +0200
Subject: [PATCH 010/152] Delete scripts/HPC chatbot
 preprocessor/start_checker.py

This file is just used to test some things locally and not part of the parser
---
 .../HPC chatbot preprocessor/start_checker.py | 35 -------------------
 1 file changed, 35 deletions(-)
 delete mode 100644 scripts/HPC chatbot preprocessor/start_checker.py

diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py
deleted file mode 100644
index b328e7ab80c..00000000000
--- a/scripts/HPC chatbot preprocessor/start_checker.py	
+++ /dev/null
@@ -1,35 +0,0 @@
-# THIS IS NOT AN IMPORTANT FILE, DON'T WORRY ABOUT IT, I JUST USED IT TO TEST SOME THINGS
-
-import os
-
-directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC"
-
-for dirpath, dirnames, filenames in os.walk(directory):
-    for filename in filenames:
-        # if filename.endswith("xdmod.md"):
-        #     break
-        if filename.endswith(".md"):
-            lines_until_title = 0
-            with open(directory + "\\" + filename, "r") as file:
-                for line in file:
-                    if line[0] == "#":
-                        break
-                    lines_until_title += 1
-            print(filename + " : " + str(lines_until_title))
-    break
-
-directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\linux-tutorial"
-
-for dirpath, dirnames, filenames in os.walk(directory):
-    for filename in filenames:
-        # if filename.endswith("xdmod.md"):
-        #     break
-        if filename.endswith(".md"):
-            lines_until_title = 0
-            with open(directory + "\\" + filename, "r") as file:
-                for line in file:
-                    if line[0] == "#":
-                        break
-                    lines_until_title += 1
-            print(filename + " : " + str(lines_until_title))
-    break

From ff7a9fc381399402c36670ef6ddb5bfb245b1dd4 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 12 Aug 2024 11:24:46 +0200
Subject: [PATCH 011/152] make sure files with duplicate names between normal
 files and linux-tutorial are both read and saved properly

---
 .../HPC chatbot preprocessor/if_mangler.py    |   2 +-
 .../HPC chatbot preprocessor/jinja_parser.py  |  24 +-
 scripts/HPC chatbot preprocessor/main.py      | 246 ++++++++++--------
 3 files changed, 149 insertions(+), 123 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py
index 8dd0d099452..46b121610c9 100644
--- a/scripts/HPC chatbot preprocessor/if_mangler.py	
+++ b/scripts/HPC chatbot preprocessor/if_mangler.py	
@@ -66,7 +66,7 @@ def mangle_os_ifs(line):
 
 def mangle_ifs(directory, file):
     with open(".\\if_mangled_files\\" + file, 'w') as write_file:
-        with open(directory + "\\" + file, 'r') as read_file:
+        with open(directory, 'r') as read_file:
             for line in read_file:
                 new_line = mangle_os_ifs(line)
                 write_file.write(new_line)
diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py
index 612c20dd06b..603a453ecf7 100644
--- a/scripts/HPC chatbot preprocessor/jinja_parser.py	
+++ b/scripts/HPC chatbot preprocessor/jinja_parser.py	
@@ -4,24 +4,28 @@
 
 
 # function that let's jinja do its thing to format the files expect for the os-related if-statements
-def jinja_parser(filename):
+def jinja_parser(filename, copy_location):
     # Read the YAML file
     with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file:
         words_dict = yaml.safe_load(yml_file)
 
-    # Mangle the OS-related if-statements
-    mangle_ifs('.\\copies', filename)
+    # ugly fix for index.md error
+    additional_context = {
+        'config': {
+            'repo_url': 'https://github.com/hpcugent/vsc_user_docs'
+        }
+    }
+    combined_context = {**words_dict, **additional_context}
 
-    # Read the if-mangled Markdown file
-    with open('.\\if_mangled_files\\' + filename, 'r') as md_file:
-        md_content = md_file.read()
+    # Mangle the OS-related if-statements
+    mangle_ifs(copy_location, filename)
 
     # Use Jinja2 to replace the macros
-    templateloader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")])
-    templateEnv = Environment(loader=templateloader)
+    template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")])
+    templateEnv = Environment(loader=template_loader)
     template = templateEnv.get_template(filename)
-    rendered_content = template.render(words_dict)
+    rendered_content = template.render(combined_context)
 
     # Save the rendered content to a new file
-    with open('.\\copies\\' + filename, 'w') as output_file:
+    with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file:
         output_file.write(rendered_content)
diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
index b2900334ef8..8351979a865 100644
--- a/scripts/HPC chatbot preprocessor/main.py	
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -7,34 +7,22 @@
 succeeded = 0
 failed = 0
 
-# make the necessary directories
-if not os.path.exists(".\\copies"):
-    os.mkdir(".\\copies")
-
-if not os.path.exists(".\\parsed_mds"):
-    os.mkdir(".\\parsed_mds")
-
-if not os.path.exists(".\\if_mangled_files"):
-    os.mkdir(".\\if_mangled_files")
-
 ################### define global variables ###################
 
 # variable that keeps track of the source directories
 source_directories = ["..\\..\\mkdocs\\docs\\HPC\\", "..\\..\\mkdocs\\docs\\HPC\\linux-tutorial"]
 
-# variable that keeps track of the directories that are used to write in at different levels
-root_dir_generic = ".\\parsed_mds\\generic\\"
-root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\"
-root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\"
-root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\"
-
 # list of all the filenames
-filenames = {}
+filenames_generic = {}
+filenames_linux = {}
 for source_directory in source_directories:
     all_items = os.listdir(source_directory)
     files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
     for file in files:
-        filenames[file] = os.path.join(source_directory, file)
+        if "linux-tutorial" in source_directory:
+            filenames_linux[file] = os.path.join(source_directory, file)
+        else:
+            filenames_generic[file] = os.path.join(source_directory, file)
 
 # TODO: find solution for duplicate filenames between linux tutorial and normal files
 
@@ -147,7 +135,7 @@ def check_if_statements(curr_line):
         content = match.group(1)
 
         # new if-statement wrt OS
-        if re.match(r'if OS == ', content):
+        if re.search(r'if OS == ', content):
             OS = content[9:-1]
 
             # set new active OS
@@ -159,7 +147,7 @@ def check_if_statements(curr_line):
                     active_OS_if_states[other_OS] = "inactive"
 
         # endif statement wrt OS
-        elif re.match(r'endif ', content):
+        elif re.search(r'endif ', content):
             if str(1) in active_OS_if_states.values():
                 active_OS_if_states[
                     list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active"
@@ -168,7 +156,7 @@ def check_if_statements(curr_line):
                     active_OS_if_states[key] = "inactive"
 
         # else statement wrt OS
-        elif re.match(r'else ', content):
+        elif re.search(r'else ', content):
 
             i = 0
             for i in range(3):
@@ -264,106 +252,140 @@ def make_valid_title(s):
     return valid_filename
 
 
-for filename in filenames.keys():
-    try:
-    # if True:
-        # make a copy of one of the md files to test some things
-        shutil.copyfile(filenames[filename],
-                        ".\\copies\\" + filename)
-
-        ################### define/reset loop specific variables ###################
-
-        # variable for the main title (needed for reference links)
-        main_title = filename[:-3]
-
-        # variable that keeps track of the directories that are used to write in at different levels
-        curr_dirs = [filename[:-3] for i in range(4)]
-
-        # variable to keep track whether we're dealing with OS-specific info or not
-        OS_specific = False
-
-        # variable that keeps track of the latest non-zero level title and corresponding directory
-        last_title_level = 1
-        last_title = None
-        last_directory = None
-        last_was_title = False
-
-        # list to keep track of links in the text
-        links_generic = []
-        links_linux = []
-        links_windows = []
-        links_macos = []
-
-        # dictionaries to keep track of current OS
-        active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
+# remove the directories from a previous run of the parser
+remove_directory_tree(".\\parsed_mds")
+remove_directory_tree(".\\copies")
+remove_directory_tree(".\\if_mangled_files")
 
-        # variable that shows whether the first title has been reached yet
-        after_first_title = False
-
-        # variable that is used to be sure that we are detecting titles and not comments from codeblocks
-        in_code_block = False
+# make the necessary directories
+if not os.path.exists(".\\copies"):
+    os.mkdir(".\\copies")
 
-        ################### actually parse the md file ###################
+if not os.path.exists(".\\copies\\linux"):
+    os.mkdir(".\\copies\\linux")
 
-        # create directories for the source markdown file
-        create_directory(root_dir_generic)
-        create_directory(".\\parsed_mds\\os_specific")
-        create_directory(root_dir_os_specific_linux)
-        create_directory(root_dir_os_specific_windows)
-        create_directory(root_dir_os_specific_macos)
-        create_directory(root_dir_generic + curr_dirs[0])
-        create_directory(root_dir_os_specific_linux + curr_dirs[0])
-        create_directory(root_dir_os_specific_windows + curr_dirs[0])
-        create_directory(root_dir_os_specific_macos + curr_dirs[0])
+if not os.path.exists(".\\parsed_mds"):
+    os.mkdir(".\\parsed_mds")
 
-        # process the jinja macros
-        jinja_parser(filename)
+if not os.path.exists(".\\if_mangled_files"):
+    os.mkdir(".\\if_mangled_files")
 
-        # open the file and store line by line in the right file
-        with open(".\\copies\\" + filename, 'r') as readfile:
+for filenames in [filenames_generic, filenames_linux]:
+    for filename in filenames.keys():
+        try:
+        # if True:
+            # make a copy of one of the md files to test some things
+            if "linux-tutorial" in filenames[filename]:
+                copy_file = ".\\copies\\linux\\" + filename
+            else:
+                copy_file = ".\\copies\\" + filename
+            shutil.copyfile(filenames[filename], copy_file)
 
-            for line in readfile:
-                title_level, title, directory = check_for_title(line)
+            ################### define/reset loop specific variables ###################
 
-                detect_in_code_block(line)
+            # variable that keeps track of the directories that are used to write in at different levels
+            if "linux-tutorial" in filenames[filename]:
+                root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\"
+                root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\"
+                root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\"
+                root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\"
+            else:
+                root_dir_generic = ".\\parsed_mds\\generic\\"
+                root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\"
+                root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\"
+                root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\"
+
+            # variable for the main title (needed for reference links)
+            main_title = filename[:-3]
+
+            # variable that keeps track of the directories that are used to write in at different levels
+            curr_dirs = [filename[:-3] for i in range(4)]
+
+            # variable to keep track whether we're dealing with OS-specific info or not
+            OS_specific = False
+
+            # variable that keeps track of the latest non-zero level title and corresponding directory
+            last_title_level = 1
+            last_title = None
+            last_directory = None
+            last_was_title = False
+
+            # list to keep track of links in the text
+            links_generic = []
+            links_linux = []
+            links_windows = []
+            links_macos = []
+
+            # dictionaries to keep track of current OS
+            active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
+
+            # variable that shows whether the first title has been reached yet
+            after_first_title = False
+
+            # variable that is used to be sure that we are detecting titles and not comments from codeblocks
+            in_code_block = False
+
+            ################### actually parse the md file ###################
+
+            # create directories for the source markdown file
+            create_directory(root_dir_generic)
+            create_directory(".\\parsed_mds\\os_specific")
+            create_directory(root_dir_os_specific_linux)
+            create_directory(root_dir_os_specific_windows)
+            create_directory(root_dir_os_specific_macos)
+            create_directory(root_dir_generic + curr_dirs[0])
+            create_directory(root_dir_os_specific_linux + curr_dirs[0])
+            create_directory(root_dir_os_specific_windows + curr_dirs[0])
+            create_directory(root_dir_os_specific_macos + curr_dirs[0])
+
+            # process the jinja macros
+            jinja_parser(filename, copy_file)
+
+            # open the file and store line by line in the right file
+            with open(copy_file, 'r') as readfile:
+
+                for line in readfile:
+                    title_level, title, directory = check_for_title(line)
+
+                    detect_in_code_block(line)
+
+                    # line is a title with a maximum depth of 3
+                    if title_level > 0:
+                        last_title_level = title_level
+                        last_title = title
+                        last_directory = directory
+                        after_first_title = True
+
+                    # line is not a title
+                    elif after_first_title:
+                        # check for if-statements and write the appropriate lines in the right files
+                        next_action = check_if_statements(line)
+                        while next_action[0] == "write_text_and_check_extra_message" or next_action[
+                            0] == "check_extra_message":
+                            if next_action[0] == "write_text_and_check_extra_message":
+                                choose_and_write_to_file(next_action[2])
+                            next_action = check_if_statements(next_action[1])
+
+                        if next_action[0] == "write_text":
+                            choose_and_write_to_file(next_action[2])
 
-                # line is a title with a maximum depth of 3
-                if title_level > 0:
-                    last_title_level = title_level
-                    last_title = title
-                    last_directory = directory
-                    after_first_title = True
+            # write end of file for the last file
+            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
+            write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
+                              links_linux)
+            write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
+                              links_windows)
+            write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
+                              links_macos)
+            succeeded += 1
+        except:
+            print("Parsing failed for file: " + filename)
+            failed += 1
 
-                # line is not a title
-                elif after_first_title:
-                    # check for if-statements and write the appropriate lines in the right files
-                    next_action = check_if_statements(line)
-                    while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
-                        if next_action[0] == "write_text_and_check_extra_message":
-                            choose_and_write_to_file(next_action[2])
-                        next_action = check_if_statements(next_action[1])
-
-                    if next_action[0] == "write_text":
-                        choose_and_write_to_file(next_action[2])
-
-        # write end of file for the last file
-        # print(root_dir_generic)
-        # print(last_directory)
-        # print(filename)
-        write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
-        write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
-                          links_linux)
-        write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
-                          links_windows)
-        write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
-                          links_macos)
-        succeeded += 1
-    except:
-        print("Parsing failed for file: " + filename)
-        failed += 1
-
-print("Success ratio: " + str(succeeded/(succeeded + failed) * 100) + "%")
-print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.")
+print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%")
+print(
+    "Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.")
 
 # TODO: directory cleanup
 # TODO: reconsider maximum depth to be detected as title
+# TODO: adapt script to be used from command line

From 7d279d6a7f1992275eae487c0893befc4a48d6f9 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 12 Aug 2024 11:54:48 +0200
Subject: [PATCH 012/152] fixed the problem of some files being written in reST
 instead of markdown

---
 scripts/HPC chatbot preprocessor/main.py | 25 +++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
index 8351979a865..12f222b82f0 100644
--- a/scripts/HPC chatbot preprocessor/main.py	
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -1,6 +1,8 @@
 import os
 import re
 import shutil
+import pypandoc
+
 from jinja_parser import jinja_parser
 
 # variables for analytics
@@ -24,10 +26,8 @@
         else:
             filenames_generic[file] = os.path.join(source_directory, file)
 
-# TODO: find solution for duplicate filenames between linux tutorial and normal files
-
-# TODO: problem-files (other layout than normal markdown-files)
-problem_files = ["linux_tutorial\\getting_started.md", "linux_tutorial\\navigating.md"]
+# some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc
+problem_files = ["getting_started.md", "navigating.md"]
 
 
 ################### define functions ###################
@@ -272,8 +272,8 @@ def make_valid_title(s):
 
 for filenames in [filenames_generic, filenames_linux]:
     for filename in filenames.keys():
-        try:
-        # if True:
+        # try:
+        if True:
             # make a copy of one of the md files to test some things
             if "linux-tutorial" in filenames[filename]:
                 copy_file = ".\\copies\\linux\\" + filename
@@ -341,6 +341,10 @@ def make_valid_title(s):
             # process the jinja macros
             jinja_parser(filename, copy_file)
 
+            # convert the files without proper markdown layout into markdown using pandoc
+            if "linux-tutorial" in filenames[filename] and filename in problem_files:
+                pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file)
+
             # open the file and store line by line in the right file
             with open(copy_file, 'r') as readfile:
 
@@ -378,13 +382,12 @@ def make_valid_title(s):
             write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
                               links_macos)
             succeeded += 1
-        except:
-            print("Parsing failed for file: " + filename)
-            failed += 1
+        # except:
+        #     print("Parsing failed for file: " + filename)
+        #     failed += 1
 
 print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%")
-print(
-    "Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.")
+print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.")
 
 # TODO: directory cleanup
 # TODO: reconsider maximum depth to be detected as title

From 8047572387eb08e278bde89f9d688b74c817b7d0 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 12 Aug 2024 13:33:14 +0200
Subject: [PATCH 013/152] some small fixes

---
 scripts/HPC chatbot preprocessor/main.py | 78 +++++++++++++-----------
 1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
index 12f222b82f0..bf16c95ea35 100644
--- a/scripts/HPC chatbot preprocessor/main.py	
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -37,11 +37,11 @@ def remove_directory_tree(old_directory):
         shutil.rmtree(old_directory)
 
 
-# function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title)
+# function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title)
 def check_for_title_logic(curr_line):
     global curr_dirs
     match = re.match(r'^#+ ', curr_line)
-    if match and len(match.group(0)) <= 4:
+    if match and len(match.group(0)) <= 5:
         return len(match.group(0)) - 1
     else:
         return 0
@@ -64,13 +64,13 @@ def check_for_title(curr_line):
         return 0, None, None
     else:
         if last_title is not None:
-            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
+            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial)
             write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
-                              links_linux)
+                              links_linux, is_linux_tutorial)
             write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
-                              links_windows)
+                              links_windows, is_linux_tutorial)
             write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
-                              links_macos)
+                              links_macos, is_linux_tutorial)
             reset_link_lists()
 
         curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + make_valid_title(
@@ -124,12 +124,11 @@ def replace_markdown_markers(curr_line, linklist):
 
 # function that checks for if-statements
 def check_if_statements(curr_line):
-    # TODO: adapt regex for annoying inconsistencies
     # check whether the first part of the line contains information wrt if-statements
-    match = re.search(r'^\{-if-%-\s([^%]*)%-if-}(.*)', curr_line)
+    match = re.search(r'^\{-if-%([^%]*)%-if-}(.*)', curr_line)
 
     # check whether the line contains information wrt if-statements that is not in its first part
-    match_large = re.search(r'^(.*)(\{-if-%-\s[^%]*%-if-})(.*)', curr_line)
+    match_large = re.search(r'^(.*)(\{-if-%[^%]*%-if-})(.*)', curr_line)
 
     if match:
         content = match.group(1)
@@ -147,7 +146,7 @@ def check_if_statements(curr_line):
                     active_OS_if_states[other_OS] = "inactive"
 
         # endif statement wrt OS
-        elif re.search(r'endif ', content):
+        elif re.search(r'endif', content):
             if str(1) in active_OS_if_states.values():
                 active_OS_if_states[
                     list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active"
@@ -156,7 +155,7 @@ def check_if_statements(curr_line):
                     active_OS_if_states[key] = "inactive"
 
         # else statement wrt OS
-        elif re.search(r'else ', content):
+        elif re.search(r'else', content):
 
             i = 0
             for i in range(3):
@@ -220,11 +219,10 @@ def choose_and_write_to_file(curr_line):
 def add_reference_link(file_location, reference_link):
     with open(file_location, 'a') as write_file:
         write_file.write("\nreference: " + reference_link + "\n")
-    # TODO: fix trailing spaces in filename
 
 
 # function that adds the links that should be at the end of a file
-def write_end_of_file(file_location, OS, linklist):
+def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_):
     if len(OS) > 0:
         OS = OS + "/"
 
@@ -234,8 +232,13 @@ def write_end_of_file(file_location, OS, linklist):
         for i, link in enumerate(linklist):
             write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n")
 
+    if is_linux_tutorial_:
+        linux_part = "linux-tutorial/"
+    else:
+        linux_part = ""
+
     # finally add the reference link
-    add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + main_title + "/#" + last_title.lower())
+    add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-'))
 
 
 # function that makes sure all titles can be used as valid filenames
@@ -243,11 +246,14 @@ def make_valid_title(s):
     # Define a regex pattern for invalid characters on both Windows and Linux
     invalid_chars = r'[<>:"/\\|?*\0()]'
 
+    # get rid of extra information between {} brackets
+    s = re.sub(r'\{.*?}', '', s)
+
     # Remove invalid characters
     valid_filename = re.sub(invalid_chars, '', s)
 
     # Strip leading/trailing whitespace
-    valid_filename = valid_filename.strip()
+    valid_filename = valid_filename.strip().strip('-')
 
     return valid_filename
 
@@ -272,19 +278,21 @@ def make_valid_title(s):
 
 for filenames in [filenames_generic, filenames_linux]:
     for filename in filenames.keys():
-        # try:
-        if True:
-            # make a copy of one of the md files to test some things
-            if "linux-tutorial" in filenames[filename]:
+        try:
+            ################### define/reset loop specific variables ###################
+
+            # variable that keeps track of whether file is part of the linux tutorial
+            is_linux_tutorial = bool("linux-tutorial" in filenames[filename])
+
+            # make a copy of the original file in order to make sure the original does not get altered
+            if is_linux_tutorial:
                 copy_file = ".\\copies\\linux\\" + filename
             else:
                 copy_file = ".\\copies\\" + filename
             shutil.copyfile(filenames[filename], copy_file)
 
-            ################### define/reset loop specific variables ###################
-
             # variable that keeps track of the directories that are used to write in at different levels
-            if "linux-tutorial" in filenames[filename]:
+            if is_linux_tutorial:
                 root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\"
                 root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\"
                 root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\"
@@ -299,7 +307,7 @@ def make_valid_title(s):
             main_title = filename[:-3]
 
             # variable that keeps track of the directories that are used to write in at different levels
-            curr_dirs = [filename[:-3] for i in range(4)]
+            curr_dirs = [filename[:-3] for i in range(5)]
 
             # variable to keep track whether we're dealing with OS-specific info or not
             OS_specific = False
@@ -353,7 +361,7 @@ def make_valid_title(s):
 
                     detect_in_code_block(line)
 
-                    # line is a title with a maximum depth of 3
+                    # line is a title with a maximum depth of 4
                     if title_level > 0:
                         last_title_level = title_level
                         last_title = title
@@ -364,8 +372,7 @@ def make_valid_title(s):
                     elif after_first_title:
                         # check for if-statements and write the appropriate lines in the right files
                         next_action = check_if_statements(line)
-                        while next_action[0] == "write_text_and_check_extra_message" or next_action[
-                            0] == "check_extra_message":
+                        while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
                             if next_action[0] == "write_text_and_check_extra_message":
                                 choose_and_write_to_file(next_action[2])
                             next_action = check_if_statements(next_action[1])
@@ -374,21 +381,22 @@ def make_valid_title(s):
                             choose_and_write_to_file(next_action[2])
 
             # write end of file for the last file
-            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic)
+            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial)
             write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
-                              links_linux)
+                              links_linux, is_linux_tutorial)
             write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
-                              links_windows)
+                              links_windows, is_linux_tutorial)
             write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
-                              links_macos)
+                              links_macos, is_linux_tutorial)
             succeeded += 1
-        # except:
-        #     print("Parsing failed for file: " + filename)
-        #     failed += 1
+        except:
+            print("Parsing failed for file: " + filename)
+            failed += 1
 
 print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%")
 print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.")
 
-# TODO: directory cleanup
-# TODO: reconsider maximum depth to be detected as title
+remove_directory_tree(".\\copies")
+remove_directory_tree(".\\if_mangled_files")
+# TODO: reconsider maximum depth to be detected as title (now at four)
 # TODO: adapt script to be used from command line

From 7d1c5ed2cfca12d5eb4ecaffa3178e821c63f210 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 10:35:06 +0200
Subject: [PATCH 014/152] remove try-except-structure

---
 scripts/HPC chatbot preprocessor/main.py | 216 +++++++++++------------
 1 file changed, 105 insertions(+), 111 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
index bf16c95ea35..2ed91022b7b 100644
--- a/scripts/HPC chatbot preprocessor/main.py	
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -278,122 +278,116 @@ def make_valid_title(s):
 
 for filenames in [filenames_generic, filenames_linux]:
     for filename in filenames.keys():
-        try:
-            ################### define/reset loop specific variables ###################
+        ################### define/reset loop specific variables ###################
 
-            # variable that keeps track of whether file is part of the linux tutorial
-            is_linux_tutorial = bool("linux-tutorial" in filenames[filename])
+        # variable that keeps track of whether file is part of the linux tutorial
+        is_linux_tutorial = bool("linux-tutorial" in filenames[filename])
 
-            # make a copy of the original file in order to make sure the original does not get altered
-            if is_linux_tutorial:
-                copy_file = ".\\copies\\linux\\" + filename
-            else:
-                copy_file = ".\\copies\\" + filename
-            shutil.copyfile(filenames[filename], copy_file)
-
-            # variable that keeps track of the directories that are used to write in at different levels
-            if is_linux_tutorial:
-                root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\"
-                root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\"
-                root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\"
-                root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\"
-            else:
-                root_dir_generic = ".\\parsed_mds\\generic\\"
-                root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\"
-                root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\"
-                root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\"
-
-            # variable for the main title (needed for reference links)
-            main_title = filename[:-3]
-
-            # variable that keeps track of the directories that are used to write in at different levels
-            curr_dirs = [filename[:-3] for i in range(5)]
-
-            # variable to keep track whether we're dealing with OS-specific info or not
-            OS_specific = False
-
-            # variable that keeps track of the latest non-zero level title and corresponding directory
-            last_title_level = 1
-            last_title = None
-            last_directory = None
-            last_was_title = False
-
-            # list to keep track of links in the text
-            links_generic = []
-            links_linux = []
-            links_windows = []
-            links_macos = []
-
-            # dictionaries to keep track of current OS
-            active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
-
-            # variable that shows whether the first title has been reached yet
-            after_first_title = False
-
-            # variable that is used to be sure that we are detecting titles and not comments from codeblocks
-            in_code_block = False
-
-            ################### actually parse the md file ###################
-
-            # create directories for the source markdown file
-            create_directory(root_dir_generic)
-            create_directory(".\\parsed_mds\\os_specific")
-            create_directory(root_dir_os_specific_linux)
-            create_directory(root_dir_os_specific_windows)
-            create_directory(root_dir_os_specific_macos)
-            create_directory(root_dir_generic + curr_dirs[0])
-            create_directory(root_dir_os_specific_linux + curr_dirs[0])
-            create_directory(root_dir_os_specific_windows + curr_dirs[0])
-            create_directory(root_dir_os_specific_macos + curr_dirs[0])
-
-            # process the jinja macros
-            jinja_parser(filename, copy_file)
-
-            # convert the files without proper markdown layout into markdown using pandoc
-            if "linux-tutorial" in filenames[filename] and filename in problem_files:
-                pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file)
-
-            # open the file and store line by line in the right file
-            with open(copy_file, 'r') as readfile:
-
-                for line in readfile:
-                    title_level, title, directory = check_for_title(line)
-
-                    detect_in_code_block(line)
-
-                    # line is a title with a maximum depth of 4
-                    if title_level > 0:
-                        last_title_level = title_level
-                        last_title = title
-                        last_directory = directory
-                        after_first_title = True
-
-                    # line is not a title
-                    elif after_first_title:
-                        # check for if-statements and write the appropriate lines in the right files
-                        next_action = check_if_statements(line)
-                        while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
-                            if next_action[0] == "write_text_and_check_extra_message":
-                                choose_and_write_to_file(next_action[2])
-                            next_action = check_if_statements(next_action[1])
-
-                        if next_action[0] == "write_text":
+        # make a copy of the original file in order to make sure the original does not get altered
+        if is_linux_tutorial:
+            copy_file = ".\\copies\\linux\\" + filename
+        else:
+            copy_file = ".\\copies\\" + filename
+        shutil.copyfile(filenames[filename], copy_file)
+
+        # variable that keeps track of the directories that are used to write in at different levels
+        if is_linux_tutorial:
+            root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\"
+            root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\"
+            root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\"
+            root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\"
+        else:
+            root_dir_generic = ".\\parsed_mds\\generic\\"
+            root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\"
+            root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\"
+            root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\"
+
+        # variable for the main title (needed for reference links)
+        main_title = filename[:-3]
+
+        # variable that keeps track of the directories that are used to write in at different levels
+        curr_dirs = [filename[:-3] for i in range(5)]
+
+        # variable to keep track whether we're dealing with OS-specific info or not
+        OS_specific = False
+
+        # variable that keeps track of the latest non-zero level title and corresponding directory
+        last_title_level = 1
+        last_title = None
+        last_directory = None
+        last_was_title = False
+
+        # list to keep track of links in the text
+        links_generic = []
+        links_linux = []
+        links_windows = []
+        links_macos = []
+
+        # dictionaries to keep track of current OS
+        active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
+
+        # variable that shows whether the first title has been reached yet
+        after_first_title = False
+
+        # variable that is used to be sure that we are detecting titles and not comments from codeblocks
+        in_code_block = False
+
+        ################### actually parse the md file ###################
+
+        # create directories for the source markdown file
+        create_directory(root_dir_generic)
+        create_directory(".\\parsed_mds\\os_specific")
+        create_directory(root_dir_os_specific_linux)
+        create_directory(root_dir_os_specific_windows)
+        create_directory(root_dir_os_specific_macos)
+        create_directory(root_dir_generic + curr_dirs[0])
+        create_directory(root_dir_os_specific_linux + curr_dirs[0])
+        create_directory(root_dir_os_specific_windows + curr_dirs[0])
+        create_directory(root_dir_os_specific_macos + curr_dirs[0])
+
+        # process the jinja macros
+        jinja_parser(filename, copy_file)
+
+        # convert the files without proper markdown layout into markdown using pandoc
+        if "linux-tutorial" in filenames[filename] and filename in problem_files:
+            pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file)
+
+        # open the file and store line by line in the right file
+        with open(copy_file, 'r') as readfile:
+
+            for line in readfile:
+                title_level, title, directory = check_for_title(line)
+
+                detect_in_code_block(line)
+
+                # line is a title with a maximum depth of 4
+                if title_level > 0:
+                    last_title_level = title_level
+                    last_title = title
+                    last_directory = directory
+                    after_first_title = True
+
+                # line is not a title
+                elif after_first_title:
+                    # check for if-statements and write the appropriate lines in the right files
+                    next_action = check_if_statements(line)
+                    while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
+                        if next_action[0] == "write_text_and_check_extra_message":
                             choose_and_write_to_file(next_action[2])
+                        next_action = check_if_statements(next_action[1])
 
-            # write end of file for the last file
-            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial)
-            write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
-                              links_linux, is_linux_tutorial)
-            write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
-                              links_windows, is_linux_tutorial)
-            write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
-                              links_macos, is_linux_tutorial)
-            succeeded += 1
-        except:
-            print("Parsing failed for file: " + filename)
-            failed += 1
+                    if next_action[0] == "write_text":
+                        choose_and_write_to_file(next_action[2])
+
+        # write end of file for the last file
+        write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial)
+        write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
+                          links_linux, is_linux_tutorial)
+        write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
+                          links_windows, is_linux_tutorial)
+        write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
+                          links_macos, is_linux_tutorial)
 
-print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%")
 print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.")
 
 remove_directory_tree(".\\copies")

From 984b0cd3868b38c59e72c56fd75f04c6e4918b18 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 12:23:04 +0200
Subject: [PATCH 015/152] collapse all code into one file

---
 scripts/HPC chatbot preprocessor/main.py | 105 ++++++++++++++++++++++-
 1 file changed, 101 insertions(+), 4 deletions(-)

diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py
index 2ed91022b7b..b6e42e99ea0 100644
--- a/scripts/HPC chatbot preprocessor/main.py	
+++ b/scripts/HPC chatbot preprocessor/main.py	
@@ -2,8 +2,8 @@
 import re
 import shutil
 import pypandoc
-
-from jinja_parser import jinja_parser
+import yaml
+from jinja2 import FileSystemLoader, Environment, ChoiceLoader
 
 # variables for analytics
 succeeded = 0
@@ -29,6 +29,9 @@
 # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc
 problem_files = ["getting_started.md", "navigating.md"]
 
+# global variable to keep track of latest if-statement scope
+is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
+
 
 ################### define functions ###################
 # function that removes the previous file structure before starting the process of making a new one
@@ -122,6 +125,102 @@ def replace_markdown_markers(curr_line, linklist):
     return curr_line, linklist
 
 
+# function that let's jinja do its thing to format the files expect for the os-related if-statements
+def jinja_parser(filename, copy_location):
+    # Read the YAML file
+    with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file:
+        words_dict = yaml.safe_load(yml_file)
+
+    # ugly fix for index.md error
+    additional_context = {
+        'config': {
+            'repo_url': 'https://github.com/hpcugent/vsc_user_docs'
+        }
+    }
+    combined_context = {**words_dict, **additional_context}
+
+    # Mangle the OS-related if-statements
+    mangle_ifs(copy_location, filename)
+
+    # Use Jinja2 to replace the macros
+    template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")])
+    templateEnv = Environment(loader=template_loader)
+    template = templateEnv.get_template(filename)
+    rendered_content = template.render(combined_context)
+
+    # Save the rendered content to a new file
+    with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file:
+        output_file.write(rendered_content)
+
+
+def mangle_os_ifs(line):
+    global is_os
+
+    match = re.search(r'\{%(.*?)%}(.*)', line)
+
+    start_index = 0
+    added_length = 0
+
+    while match:
+
+        constr_match = re.search(r'\{%.*?%}', match.string)
+        if_match = re.search(r'if ', match.group(1))
+        if_os_match = re.search(r'if OS == ', match.group(1))
+        endif_match = re.search(r'endif', match.group(1))
+
+        if endif_match:
+            if is_os == 2:
+                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
+                added_length += 8
+                is_os = 0
+            if is_os == 3:
+                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
+                added_length += 8
+                is_os = 2
+            elif is_os == 1:
+                is_os = 2
+        elif if_match:
+            if if_os_match:
+                if is_os == 2:
+                    line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                                   constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                         constr_match.end() + start_index + added_length - 1:]
+                    added_length += 8
+                    is_os = 3
+                else:
+                    line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                                   constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                         constr_match.end() + start_index + added_length - 1:]
+                    added_length += 8
+                    is_os = 2
+            else:
+                if is_os == 2:
+                    is_os = 1
+                else:
+                    is_os = 0
+        else:
+            if is_os == 2 or is_os == 3:
+                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
+                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
+                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
+                added_length += 8
+        start_index += constr_match.end()
+        match = re.search(r'\{%(.*?)%}(.*)', match.group(2))
+    return line
+
+
+def mangle_ifs(directory, file):
+    with open(".\\if_mangled_files\\" + file, 'w') as write_file:
+        with open(directory, 'r') as read_file:
+            for line in read_file:
+                new_line = mangle_os_ifs(line)
+                write_file.write(new_line)
+
+
 # function that checks for if-statements
 def check_if_statements(curr_line):
     # check whether the first part of the line contains information wrt if-statements
@@ -388,8 +487,6 @@ def make_valid_title(s):
         write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
                           links_macos, is_linux_tutorial)
 
-print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.")
-
 remove_directory_tree(".\\copies")
 remove_directory_tree(".\\if_mangled_files")
 # TODO: reconsider maximum depth to be detected as title (now at four)

From 8f5eeaa5454860326bf3a02d15a63c5622ab7aee Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 12:26:02 +0200
Subject: [PATCH 016/152] Rename file

---
 scripts/HPC chatbot preprocessor/{main.py => chatbot_parser.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename scripts/HPC chatbot preprocessor/{main.py => chatbot_parser.py} (100%)

diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/chatbot_parser.py
similarity index 100%
rename from scripts/HPC chatbot preprocessor/main.py
rename to scripts/HPC chatbot preprocessor/chatbot_parser.py

From 2b97b7a31d9ba151f1747a152736dac4906af466 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 12:30:36 +0200
Subject: [PATCH 017/152] cleanup repository

---
 .../HPC chatbot preprocessor/.idea/.gitignore |   8 -
 .../.idea/HPC chatbot preprocessor.iml        |  10 -
 .../inspectionProfiles/Project_Default.xml    |  25 --
 .../inspectionProfiles/profiles_settings.xml  |   6 -
 .../HPC chatbot preprocessor/.idea/misc.xml   |   7 -
 .../.idea/modules.xml                         |   8 -
 .../HPC chatbot preprocessor/.idea/vcs.xml    |   6 -
 .../copies/getting_started_copy.md            | 268 ------------------
 .../Getting-Access/Getting-Access.txt         |  25 --
 .../Getting-Connected/Getting-Connected.txt   |  19 --
 .../Getting-Started/Getting-Started.txt       |  11 -
 .../Inspect-your-results.txt                  |  56 ----
 .../Getting-Started/Next-steps/Next-steps.txt |  15 -
 .../Submitting-a-job/Submitting-a-job.txt     |  60 ----
 .../Transfer-your-files.txt                   |  21 --
 .../Wait-for-job-to-be-executed.txt           |  26 --
 .../Getting-Access/Getting-Access.txt         |   2 -
 .../Getting-Connected/Getting-Connected.txt   |  18 --
 .../Getting-Started/Getting-Started.txt       |   2 -
 .../Inspect-your-results.txt                  |   2 -
 .../Getting-Started/Next-steps/Next-steps.txt |   2 -
 .../Submitting-a-job/Submitting-a-job.txt     |   2 -
 .../Transfer-your-files.txt                   |  21 --
 .../Wait-for-job-to-be-executed.txt           |   2 -
 .../Getting-Access/Getting-Access.txt         |   2 -
 .../Getting-Connected/Getting-Connected.txt   |  13 -
 .../Getting-Started/Getting-Started.txt       |   2 -
 .../Inspect-your-results.txt                  |   2 -
 .../Getting-Started/Next-steps/Next-steps.txt |   2 -
 .../Submitting-a-job/Submitting-a-job.txt     |   2 -
 .../Transfer-your-files.txt                   |  21 --
 .../Wait-for-job-to-be-executed.txt           |   2 -
 .../Getting-Access/Getting-Access.txt         |   2 -
 .../Getting-Connected/Getting-Connected.txt   |  13 -
 .../Getting-Started/Getting-Started.txt       |   2 -
 .../Inspect-your-results.txt                  |   2 -
 .../Getting-Started/Next-steps/Next-steps.txt |   2 -
 .../Submitting-a-job/Submitting-a-job.txt     |   2 -
 .../Transfer-your-files.txt                   |  15 -
 .../Wait-for-job-to-be-executed.txt           |   2 -
 .../HPC chatbot preprocessor/if_mangler.py    |  72 -----
 .../HPC chatbot preprocessor/jinja_parser.py  |  31 --
 42 files changed, 811 deletions(-)
 delete mode 100644 scripts/HPC chatbot preprocessor/.idea/.gitignore
 delete mode 100644 scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml
 delete mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml
 delete mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml
 delete mode 100644 scripts/HPC chatbot preprocessor/.idea/misc.xml
 delete mode 100644 scripts/HPC chatbot preprocessor/.idea/modules.xml
 delete mode 100644 scripts/HPC chatbot preprocessor/.idea/vcs.xml
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/getting_started_copy.md
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
 delete mode 100644 scripts/HPC chatbot preprocessor/if_mangler.py
 delete mode 100644 scripts/HPC chatbot preprocessor/jinja_parser.py

diff --git a/scripts/HPC chatbot preprocessor/.idea/.gitignore b/scripts/HPC chatbot preprocessor/.idea/.gitignore
deleted file mode 100644
index 13566b81b01..00000000000
--- a/scripts/HPC chatbot preprocessor/.idea/.gitignore	
+++ /dev/null
@@ -1,8 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
-# Editor-based HTTP Client requests
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml
diff --git a/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml b/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml
deleted file mode 100644
index 2c80e126949..00000000000
--- a/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml	
+++ /dev/null
@@ -1,10 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$">
-      <excludeFolder url="file://$MODULE_DIR$/.venv" />
-    </content>
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml
deleted file mode 100644
index fc946d9cefc..00000000000
--- a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml	
+++ /dev/null
@@ -1,25 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <profile version="1.0">
-    <option name="myName" value="Project Default" />
-    <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
-      <Languages>
-        <language minSize="205" name="Python" />
-      </Languages>
-    </inspection_tool>
-    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
-    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
-      <option name="ignoredErrors">
-        <list>
-          <option value="E501" />
-        </list>
-      </option>
-    </inspection_tool>
-    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
-      <option name="ignoredErrors">
-        <list>
-          <option value="N802" />
-        </list>
-      </option>
-    </inspection_tool>
-  </profile>
-</component>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2da2d6..00000000000
--- a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml	
+++ /dev/null
@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/misc.xml b/scripts/HPC chatbot preprocessor/.idea/misc.xml
deleted file mode 100644
index 54cda8fd6dd..00000000000
--- a/scripts/HPC chatbot preprocessor/.idea/misc.xml	
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="Black">
-    <option name="sdkName" value="Python 3.12 (HPC chatbot preprocessor)" />
-  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (HPC chatbot preprocessor)" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/modules.xml b/scripts/HPC chatbot preprocessor/.idea/modules.xml
deleted file mode 100644
index 58e027d745f..00000000000
--- a/scripts/HPC chatbot preprocessor/.idea/modules.xml	
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/HPC chatbot preprocessor.iml" filepath="$PROJECT_DIR$/.idea/HPC chatbot preprocessor.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/.idea/vcs.xml b/scripts/HPC chatbot preprocessor/.idea/vcs.xml
deleted file mode 100644
index b2bdec2d71b..00000000000
--- a/scripts/HPC chatbot preprocessor/.idea/vcs.xml	
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md b/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md
deleted file mode 100644
index 8fe33ebc513..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md	
+++ /dev/null
@@ -1,268 +0,0 @@
-{% set exampleloc="mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist" %}
-# Getting Started
-
-Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the {{hpcinfra}} and submitting your very first job. We'll also walk you through the process step by step using a practical example.
-
-In addition to this chapter, you might find the [recording of the *Introduction to HPC-UGent* training session](https://www.ugent.be/hpc/en/training/introhpcugent-recording) to be a useful resource.
-
-Before proceeding, read [the introduction to HPC](introduction.md) to gain an understanding of the {{ hpcinfra }} and related terminology.
-
-### Getting Access
-
-To get access to the {{hpcinfra}}, visit [Getting an HPC Account](account.md).
-
-If you have not used Linux before, 
-{%- if site == 'Gent' %}
-now would be a good time to follow our [Linux Tutorial](linux-tutorial/index.md).
-{%- else %}
-please learn some basics first before continuing. (see [Appendix C - Useful Linux Commands](useful_linux_commands.md))
-{%- endif %}
-
-#### A typical workflow looks like this:
-
-1.  Connect to the login nodes 
-2.  Transfer your files to the {{hpcinfra}}
-3.  Optional: compile your code and test it 
-4.  Create a job script and submit your job
-5.  Wait for job to be executed
-6.  Study the results generated by your jobs, either on the cluster or
-    after downloading them locally.
-
-We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using [TensorFlow](https://www.tensorflow.org/);
-see the [example scripts](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}}).
-
-### Getting Connected
-
-There are two options to connect
-
-- Using a terminal to connect via SSH (for power users) (see [First Time connection to the {{ hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure))
-- [Using the web portal](web_portal.md)
-
-Considering your operating system is **{{OS}}**, 
-
-{%- if OS == linux %}
-it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. 
-
-Assuming you have already generated SSH keys in the previous step ([Getting Access](#getting-access)), and that they are in a default location, you should now be able to login by running the following command:
-
-<pre><code>ssh {{userid}}@{{loginnode}}</code></pre>
-
-!!! Warning "User your own VSC account id"
-    
-    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
-
-!!! Tip
-
-    You can also still use the web portal (see [shell access on web portal](web_portal.md#shell-access))
-
-{%- else %}
-{%- if OS == windows %} it is recommended to use the web portal.
-{%- else %} it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. {%- endif %}
-
-The [web portal](web_portal.md) offers a convenient way to upload files and gain shell access to the {{hpcinfra}} from a standard web browser (no software installation or configuration required).
-
-See [shell access](web_portal.md#shell-access) when using the web portal, or
-[connection to the {{hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure) when using a terminal.
-
-Make sure you can get to a shell access to the {{hpcinfra}} before proceeding with the next steps.
-
-{%- endif %}
-
-!!! Info
-
-    When having problems see the [connection issues section on the troubleshooting page](troubleshooting.md#sec:connecting-issues).
-
-
-### Transfer your files
-
-Now that you can login, it is time to transfer files from your local computer to your **home directory** on the {{hpcinfra}}.
-
-Download [tensorflow_mnist.py](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py) 
-and [run.sh](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh) example scripts to your computer (from [here](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}})).
-
-{%- if OS == windows %}
-
-The [HPC-UGent web portal](https://login.hpc.ugent.be) provides a file browser that allows uploading files.
-For more information see the [file browser section](web_portal.md#file-browser).
-
-Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell.
-
-!!! Info
-
-    As an alternative, you can use WinSCP (see [our section](connecting.md#winscp))
-
-{%- else %}
-
-On your local machine you can run:
-<pre><code>curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py
-curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh
-</code></pre>
-
-Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC).
-<pre><code>scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ </code></pre>
-<pre><code>ssh  {{userid}}@{{ loginnode }} </code></pre>
-
-!!! Warning "User your own VSC account id"
-    
-    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
-
-!!! Info
-
-    For more information about transfering files or `scp`, see [tranfer files from/to hpc](connecting.md#transfer-files-tofrom-the-hpc).
-
-{%- endif %}
-
-When running `ls` in your session on the {{hpcinfra}}, you should see the two files listed in your home directory (`~`):
-
-```shell
-$ ls ~
-run.sh tensorflow_mnist.py
-```
-
-When you do not see these files, make sure you uploaded the files to your **home directory**.
-
-### Submitting a job
-
-Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script.
-
-A job script is a shell script, a text file that specifies the resources, 
-the software that is used (via `module load` statements), 
-and the steps that should be executed to run the calculation.
-
-Our job script looks like this:
-
-<center>-- run.sh --</center>
-
-```bash
-#!/bin/bash
-
-module load TensorFlow/2.11.0-foss-2022a
-
-python tensorflow_mnist.py
-
-```
-<sub>As you can see this job script will run the Python script named **tensorflow_mnist.py**.</sub>
-
-
-The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command.
-
-```shell
-module swap cluster/{{othercluster}}
-```
-
-!!! Tip
-    
-    When submitting jobs with limited amount of resources, it is recommended to use the [debug/interactive cluster](interactive_debug.md#interactive-and-debug-cluster): `donphan`. 
-
-{%- if site == 'Gent' %}
-
-    To get a list of all clusters and their hardware, see <https://www.ugent.be/hpc/en/infrastructure>.
-
-{%- endif %}
-
-This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command:
-
-```shell
-$ qsub run.sh
-{{jobid}}
-```
-
-This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job.
-
-!!! Warning "Make sure you understand what the `module` command does"
- 
-    Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, 
-    but our active shell session is still running on the login node.
-    
-    It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on.
-    
-    When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`).
-
-For detailed information about `module` commands, read the [running batch jobs](running_batch_jobs.md) chapter.
-
-### Wait for job to be executed
-
-Your job is put into a queue before being executed, so it may take a while before it actually starts.
-(see [when will my job start?](running_batch_jobs.md#when-will-my-job-start) for scheduling policy).
-
-You can get an overview of the active jobs using the `qstat` command:
-<pre><code>$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:00  <b style="color:orange">Q</b> {{othercluster}}
-</code></pre> 
-
-Eventually, after entering `qstat` again you should see that your job has started running:
-<pre><code>$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:01  <b style="color:green">R</b> {{othercluster}}
-</code></pre> 
-
-If you don't see your job in the output of the `qstat` command anymore, your job has likely completed.
-
-Read [this section](running_batch_jobs.md#monitoring-and-managing-your-jobs) on how to interpret the output.
-
-### Inspect your results
-
-When your job finishes it generates 2 output files:
-
-- One for normal output messages (*stdout* output channel).
-- One for warning and error messages (*stderr* output channel).
-
-By default located in the directory where you issued `qsub`.
-
-{%- if site == 'Gent' %}
-
-!!! Info
-
-    For more information about the stdout and stderr output channels, see this [section](linux-tutorial/beyond_the_basics.md#inputoutput).
-
-{%- endif %}
-
-In our example when running <code>ls</code> in the current directory you should see 2 new files:
- 
-- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}};
-- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}.
-
-!!! Info
-    
-    run.sh.e{{jobid}} should be empty (no errors or warnings).
-
-!!! Warning "Use your own job ID"
-
-    Replace <b>{{jobid}}</b> with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`.
-
-When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this:
-```
-Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
-11493376/11490434 [==============================] - 1s 0us/step
-Epoch 1/5
-1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133
-Epoch 2/5
-1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571
-Epoch 3/5
-1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675
-Epoch 4/5
-1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727
-Epoch 5/5
-1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768
-313/313 - 0s - loss: 0.0782 - accuracy: 0.9764
-```
-
-Hurray 🎉, we trained a deep learning model and achieved 97,64 percent accuracy.
-
-!!! Warning
-
-    When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see [GPU clusters](gpu.md).
-
-    For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster.
-
-### Next steps
-
-- [Running interactive jobs](running_interactive_jobs.md)
-- [Running jobs with input/output data](running_jobs_with_input_output_data.md)
-- [Multi core jobs/Parallel Computing](multi_core_jobs.md)
-- [Interactive and debug cluster](interactive_debug.md#interactive-and-debug-cluster)
-
-For more examples see [Program examples](program_examples.md) and [Job script examples](jobscript_examples.md)
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
deleted file mode 100644
index f95191b96f0..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt	
+++ /dev/null
@@ -1,25 +0,0 @@
-
-To get access to the HPC-UGent infrastructure, visit Getting an HPC Account[1].
-
-If you have not used Linux before, 
-now would be a good time to follow our Linux Tutorial[2].
-
-#### A typical workflow looks like this:
-
-1.  Connect to the login nodes 
-2.  Transfer your files to the HPC-UGent infrastructure
-3.  Optional: compile your code and test it 
-4.  Create a job script and submit your job
-5.  Wait for job to be executed
-6.  Study the results generated by your jobs, either on the cluster or
-    after downloading them locally.
-
-We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using TensorFlow[3];
-see the example scripts[4].
-
-[1]: account.md
-[2]: linux-tutorial/index.md
-[3]: https://www.tensorflow.org/
-[4]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist
-
-reference: docs.hpc.ugent.be/getting_started_copy/#getting-access
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
deleted file mode 100644
index 94f17ac5070..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt	
+++ /dev/null
@@ -1,19 +0,0 @@
-
-There are two options to connect
-
-- Using a terminal to connect via SSH (for power users) (see First Time connection to the HPC-UGent infrastructure[1])
-- Using the web portal[2]
-
-Considering your operating system is **{{OS}}**, 
-
-
-!!! Info
-
-    When having problems see the connection issues section on the troubleshooting page[3].
-
-
-[1]: connecting.md#first-time-connection-to-the-hpc-infrastructure
-[2]: web_portal.md
-[3]: troubleshooting.md#sec:connecting-issues
-
-reference: docs.hpc.ugent.be/getting_started_copy/#getting-connected
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt
deleted file mode 100644
index 3403b57f2c2..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt	
+++ /dev/null
@@ -1,11 +0,0 @@
-
-Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the HPC-UGent infrastructure and submitting your very first job. We'll also walk you through the process step by step using a practical example.
-
-In addition to this chapter, you might find the recording of the *Introduction to HPC-UGent* training session[1] to be a useful resource.
-
-Before proceeding, read the introduction to HPC[2] to gain an understanding of the HPC-UGent infrastructure and related terminology.
-
-[1]: https://www.ugent.be/hpc/en/training/introhpcugent-recording
-[2]: introduction.md
-
-reference: docs.hpc.ugent.be/getting_started_copy/#getting-started
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
deleted file mode 100644
index 417416007f5..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt	
+++ /dev/null
@@ -1,56 +0,0 @@
-
-When your job finishes it generates 2 output files:
-
-- One for normal output messages (*stdout* output channel).
-- One for warning and error messages (*stderr* output channel).
-
-By default located in the directory where you issued `qsub`.
-
-
-!!! Info
-
-    For more information about the stdout and stderr output channels, see this section[1].
-
-
-In our example when running <code>ls</code> in the current directory you should see 2 new files:
- 
-- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}};
-- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}.
-
-!!! Info
-    
-    run.sh.e{{jobid}} should be empty (no errors or warnings).
-
-!!! Warning "Use your own job ID"
-
-    Replace <b>{{jobid}}</b> with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`.
-
-When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this:
-```
-Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
-11493376/11490434 [==============================] - 1s 0us/step
-Epoch 1/5
-1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133
-Epoch 2/5
-1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571
-Epoch 3/5
-1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675
-Epoch 4/5
-1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727
-Epoch 5/5
-1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768
-313/313 - 0s - loss: 0.0782 - accuracy: 0.9764
-```
-
-Hurray 🎉, we trained a deep learning model and achieved 97,64 percent accuracy.
-
-!!! Warning
-
-    When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see GPU clusters[2].
-
-    For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster.
-
-[1]: linux-tutorial/beyond_the_basics.md#inputoutput
-[2]: gpu.md
-
-reference: docs.hpc.ugent.be/getting_started_copy/#inspect-your-results
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
deleted file mode 100644
index 804b56b8251..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt	
+++ /dev/null
@@ -1,15 +0,0 @@
-
-- Running interactive jobs[1]
-- Running jobs with input/output data[2]
-- Multi core jobs/Parallel Computing[3]
-- Interactive and debug cluster[4]
-
-For more examples see Program examples[5] and Job script examples[6]
-[1]: running_interactive_jobs.md
-[2]: running_jobs_with_input_output_data.md
-[3]: multi_core_jobs.md
-[4]: interactive_debug.md#interactive-and-debug-cluster
-[5]: program_examples.md
-[6]: jobscript_examples.md
-
-reference: docs.hpc.ugent.be/getting_started_copy/#next-steps
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
deleted file mode 100644
index edb336fa06b..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt	
+++ /dev/null
@@ -1,60 +0,0 @@
-
-Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script.
-
-A job script is a shell script, a text file that specifies the resources, 
-the software that is used (via `module load` statements), 
-and the steps that should be executed to run the calculation.
-
-Our job script looks like this:
-
-<center>-- run.sh --</center>
-
-```bash
-#!/bin/bash
-
-module load TensorFlow/2.11.0-foss-2022a
-
-python tensorflow_mnist.py
-
-```
-<sub>As you can see this job script will run the Python script named **tensorflow_mnist.py**.</sub>
-
-
-The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command.
-
-```shell
-module swap cluster/{{othercluster}}
-```
-
-!!! Tip
-    
-    When submitting jobs with limited amount of resources, it is recommended to use the debug/interactive cluster[1]: `donphan`. 
-
-
-    To get a list of all clusters and their hardware, see <https://www.ugent.be/hpc/en/infrastructure>.
-
-
-This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command:
-
-```shell
-$ qsub run.sh
-{{jobid}}
-```
-
-This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job.
-
-!!! Warning "Make sure you understand what the `module` command does"
- 
-    Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, 
-    but our active shell session is still running on the login node.
-    
-    It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on.
-    
-    When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`).
-
-For detailed information about `module` commands, read the running batch jobs[2] chapter.
-
-[1]: interactive_debug.md#interactive-and-debug-cluster
-[2]: running_batch_jobs.md
-
-reference: docs.hpc.ugent.be/getting_started_copy/#submitting-a-job
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
deleted file mode 100644
index 94dc30f6712..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt	
+++ /dev/null
@@ -1,21 +0,0 @@
-
-Now that you can login, it is time to transfer files from your local computer to your **home directory** on the HPC-UGent infrastructure.
-
-Download tensorflow_mnist.py[1] 
-and run.sh[2] example scripts to your computer (from here[3]).
-
-
-When running `ls` in your session on the HPC-UGent infrastructure, you should see the two files listed in your home directory (`~`):
-
-```shell
-$ ls ~
-run.sh tensorflow_mnist.py
-```
-
-When you do not see these files, make sure you uploaded the files to your **home directory**.
-
-[1]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
-[2]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
-[3]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist
-
-reference: docs.hpc.ugent.be/getting_started_copy/#transfer-your-files
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
deleted file mode 100644
index de177946cf9..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt	
+++ /dev/null
@@ -1,26 +0,0 @@
-
-Your job is put into a queue before being executed, so it may take a while before it actually starts.
-(see when will my job start?[1] for scheduling policy).
-
-You can get an overview of the active jobs using the `qstat` command:
-<pre><code>$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:00  <b style="color:orange">Q</b> {{othercluster}}
-</code></pre> 
-
-Eventually, after entering `qstat` again you should see that your job has started running:
-<pre><code>$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:01  <b style="color:green">R</b> {{othercluster}}
-</code></pre> 
-
-If you don't see your job in the output of the `qstat` command anymore, your job has likely completed.
-
-Read this section[2] on how to interpret the output.
-
-[1]: running_batch_jobs.md#when-will-my-job-start
-[2]: running_batch_jobs.md#monitoring-and-managing-your-jobs
-
-reference: docs.hpc.ugent.be/getting_started_copy/#wait-for-job-to-be-executed
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
deleted file mode 100644
index e756b9a3cbe..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-access
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
deleted file mode 100644
index bac5dfcbfbe..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt	
+++ /dev/null
@@ -1,18 +0,0 @@
-it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. 
-
-Assuming you have already generated SSH keys in the previous step (Getting Access[1]), and that they are in a default location, you should now be able to login by running the following command:
-
-<pre><code>ssh {{userid}}@{{loginnode}}</code></pre>
-
-!!! Warning "User your own VSC account id"
-    
-    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
-
-!!! Tip
-
-    You can also still use the web portal (see shell access on web portal[2])
-
-[1]: #getting-access
-[2]: web_portal.md#shell-access
-
-reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-connected
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt
deleted file mode 100644
index f0b9d83bed3..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-started
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
deleted file mode 100644
index 441b54c7042..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Linux/getting_started_copy/#inspect-your-results
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
deleted file mode 100644
index d72ffccf01a..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Linux/getting_started_copy/#next-steps
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
deleted file mode 100644
index 744c2c3db7a..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Linux/getting_started_copy/#submitting-a-job
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
deleted file mode 100644
index aca6e05d28c..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt	
+++ /dev/null
@@ -1,21 +0,0 @@
-
-On your local machine you can run:
-<pre><code>curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
-curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
-</code></pre>
-
-Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC).
-<pre><code>scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ </code></pre>
-<pre><code>ssh  {{userid}}@{{ loginnode }} </code></pre>
-
-!!! Warning "User your own VSC account id"
-    
-    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
-
-!!! Info
-
-    For more information about transfering files or `scp`, see tranfer files from/to hpc[1].
-
-[1]: connecting.md#transfer-files-tofrom-the-hpc
-
-reference: docs.hpc.ugent.be/Linux/getting_started_copy/#transfer-your-files
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
deleted file mode 100644
index 93e6fdff171..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Linux/getting_started_copy/#wait-for-job-to-be-executed
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
deleted file mode 100644
index 8732e586981..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-access
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
deleted file mode 100644
index 2b1de2be838..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt	
+++ /dev/null
@@ -1,13 +0,0 @@
-it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. 
-The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required).
-
-See shell access[2] when using the web portal, or
-connection to the HPC-UGent infrastructure[3] when using a terminal.
-
-Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps.
-
-[1]: web_portal.md
-[2]: web_portal.md#shell-access
-[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure
-
-reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-connected
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt
deleted file mode 100644
index 4e60f862a0a..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-started
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
deleted file mode 100644
index f7ae9f96226..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/macOS/getting_started_copy/#inspect-your-results
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
deleted file mode 100644
index 71f384bcf17..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/macOS/getting_started_copy/#next-steps
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
deleted file mode 100644
index d72ba48195a..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/macOS/getting_started_copy/#submitting-a-job
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
deleted file mode 100644
index fce05042ab2..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt	
+++ /dev/null
@@ -1,21 +0,0 @@
-
-On your local machine you can run:
-<pre><code>curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
-curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
-</code></pre>
-
-Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC).
-<pre><code>scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ </code></pre>
-<pre><code>ssh  {{userid}}@{{ loginnode }} </code></pre>
-
-!!! Warning "User your own VSC account id"
-    
-    Replace <b>{{userid}}</b> with your VSC account id (see <https://account.vscentrum.be>)
-
-!!! Info
-
-    For more information about transfering files or `scp`, see tranfer files from/to hpc[1].
-
-[1]: connecting.md#transfer-files-tofrom-the-hpc
-
-reference: docs.hpc.ugent.be/macOS/getting_started_copy/#transfer-your-files
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
deleted file mode 100644
index 2ef8770504b..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/macOS/getting_started_copy/#wait-for-job-to-be-executed
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt
deleted file mode 100644
index 874af365704..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-access
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt
deleted file mode 100644
index ce0b873b2b0..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt	
+++ /dev/null
@@ -1,13 +0,0 @@
-it is recommended to use the web portal.
-The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required).
-
-See shell access[2] when using the web portal, or
-connection to the HPC-UGent infrastructure[3] when using a terminal.
-
-Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps.
-
-[1]: web_portal.md
-[2]: web_portal.md#shell-access
-[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure
-
-reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-connected
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt
deleted file mode 100644
index 44d1f17b73b..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-started
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt
deleted file mode 100644
index 730fbbc3b74..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Windows/getting_started_copy/#inspect-your-results
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt
deleted file mode 100644
index 55df915125a..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Windows/getting_started_copy/#next-steps
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt
deleted file mode 100644
index f67d48ece4a..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Windows/getting_started_copy/#submitting-a-job
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt
deleted file mode 100644
index dce86fc7cf3..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt	
+++ /dev/null
@@ -1,15 +0,0 @@
-
-The HPC-UGent web portal[1] provides a file browser that allows uploading files.
-For more information see the file browser section[2].
-
-Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell.
-
-!!! Info
-
-    As an alternative, you can use WinSCP (see our section[3])
-
-[1]: https://login.hpc.ugent.be
-[2]: web_portal.md#file-browser
-[3]: connecting.md#winscp
-
-reference: docs.hpc.ugent.be/Windows/getting_started_copy/#transfer-your-files
diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt
deleted file mode 100644
index bdd7387e379..00000000000
--- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt	
+++ /dev/null
@@ -1,2 +0,0 @@
-
-reference: docs.hpc.ugent.be/Windows/getting_started_copy/#wait-for-job-to-be-executed
diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py
deleted file mode 100644
index 46b121610c9..00000000000
--- a/scripts/HPC chatbot preprocessor/if_mangler.py	
+++ /dev/null
@@ -1,72 +0,0 @@
-import re
-
-# global variable to keep track of latest if-statement scope
-is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
-
-
-def mangle_os_ifs(line):
-    global is_os
-
-    match = re.search(r'\{%(.*?)%}(.*)', line)
-
-    start_index = 0
-    added_length = 0
-
-    while match:
-
-        constr_match = re.search(r'\{%.*?%}', match.string)
-        if_match = re.search(r'if ', match.group(1))
-        if_os_match = re.search(r'if OS == ', match.group(1))
-        endif_match = re.search(r'endif', match.group(1))
-
-        if endif_match:
-            if is_os == 2:
-                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
-                added_length += 8
-                is_os = 0
-            if is_os == 3:
-                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
-                added_length += 8
-                is_os = 2
-            elif is_os == 1:
-                is_os = 2
-        elif if_match:
-            if if_os_match:
-                if is_os == 2:
-                    line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                                   constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                         constr_match.end() + start_index + added_length - 1:]
-                    added_length += 8
-                    is_os = 3
-                else:
-                    line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                                   constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                         constr_match.end() + start_index + added_length - 1:]
-                    added_length += 8
-                    is_os = 2
-            else:
-                if is_os == 2:
-                    is_os = 1
-                else:
-                    is_os = 0
-        else:
-            if is_os == 2 or is_os == 3:
-                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
-                added_length += 8
-        start_index += constr_match.end()
-        match = re.search(r'\{%(.*?)%}(.*)', match.group(2))
-    return line
-
-
-def mangle_ifs(directory, file):
-    with open(".\\if_mangled_files\\" + file, 'w') as write_file:
-        with open(directory, 'r') as read_file:
-            for line in read_file:
-                new_line = mangle_os_ifs(line)
-                write_file.write(new_line)
diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py
deleted file mode 100644
index 603a453ecf7..00000000000
--- a/scripts/HPC chatbot preprocessor/jinja_parser.py	
+++ /dev/null
@@ -1,31 +0,0 @@
-import yaml
-from jinja2 import Template, FileSystemLoader, Environment, ChoiceLoader
-from if_mangler import mangle_ifs
-
-
-# function that let's jinja do its thing to format the files expect for the os-related if-statements
-def jinja_parser(filename, copy_location):
-    # Read the YAML file
-    with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file:
-        words_dict = yaml.safe_load(yml_file)
-
-    # ugly fix for index.md error
-    additional_context = {
-        'config': {
-            'repo_url': 'https://github.com/hpcugent/vsc_user_docs'
-        }
-    }
-    combined_context = {**words_dict, **additional_context}
-
-    # Mangle the OS-related if-statements
-    mangle_ifs(copy_location, filename)
-
-    # Use Jinja2 to replace the macros
-    template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")])
-    templateEnv = Environment(loader=template_loader)
-    template = templateEnv.get_template(filename)
-    rendered_content = template.render(combined_context)
-
-    # Save the rendered content to a new file
-    with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file:
-        output_file.write(rendered_content)

From b595301e5bd4b0c19a1beea04affeedb31e00a8c Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 13:12:37 +0200
Subject: [PATCH 018/152] Rename directory

---
 .../chatbot_parser.py                                             | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename scripts/{HPC chatbot preprocessor => HPC_chatbot_preprocessor}/chatbot_parser.py (100%)

diff --git a/scripts/HPC chatbot preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
similarity index 100%
rename from scripts/HPC chatbot preprocessor/chatbot_parser.py
rename to scripts/HPC_chatbot_preprocessor/chatbot_parser.py

From 90c8ab760b7ff96d1536d5d85e29a36ea8bf90b5 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 13:26:20 +0200
Subject: [PATCH 019/152] add a main function

---
 .../chatbot_parser.py                         | 256 +++++++++---------
 1 file changed, 130 insertions(+), 126 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index b6e42e99ea0..79951a5d0da 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -30,7 +30,7 @@
 problem_files = ["getting_started.md", "navigating.md"]
 
 # global variable to keep track of latest if-statement scope
-is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
+is_os = 0  # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
 
 
 ################### define functions ###################
@@ -356,138 +356,142 @@ def make_valid_title(s):
 
     return valid_filename
 
+def main():
+    global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos
+    # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
+    remove_directory_tree(".\\parsed_mds")
+    remove_directory_tree(".\\copies")
+    remove_directory_tree(".\\if_mangled_files")
 
-# remove the directories from a previous run of the parser
-remove_directory_tree(".\\parsed_mds")
-remove_directory_tree(".\\copies")
-remove_directory_tree(".\\if_mangled_files")
+    # make the necessary directories
+    if not os.path.exists(".\\copies"):
+        os.mkdir(".\\copies")
 
-# make the necessary directories
-if not os.path.exists(".\\copies"):
-    os.mkdir(".\\copies")
+    if not os.path.exists(".\\copies\\linux"):
+        os.mkdir(".\\copies\\linux")
 
-if not os.path.exists(".\\copies\\linux"):
-    os.mkdir(".\\copies\\linux")
+    if not os.path.exists(".\\parsed_mds"):
+        os.mkdir(".\\parsed_mds")
 
-if not os.path.exists(".\\parsed_mds"):
-    os.mkdir(".\\parsed_mds")
+    if not os.path.exists(".\\if_mangled_files"):
+        os.mkdir(".\\if_mangled_files")
 
-if not os.path.exists(".\\if_mangled_files"):
-    os.mkdir(".\\if_mangled_files")
+    for filenames in [filenames_generic, filenames_linux]:
+        for filename in filenames.keys():
+            ################### define/reset loop specific variables ###################
 
-for filenames in [filenames_generic, filenames_linux]:
-    for filename in filenames.keys():
-        ################### define/reset loop specific variables ###################
+            # variable that keeps track of whether file is part of the linux tutorial
+            is_linux_tutorial = bool("linux-tutorial" in filenames[filename])
 
-        # variable that keeps track of whether file is part of the linux tutorial
-        is_linux_tutorial = bool("linux-tutorial" in filenames[filename])
-
-        # make a copy of the original file in order to make sure the original does not get altered
-        if is_linux_tutorial:
-            copy_file = ".\\copies\\linux\\" + filename
-        else:
-            copy_file = ".\\copies\\" + filename
-        shutil.copyfile(filenames[filename], copy_file)
-
-        # variable that keeps track of the directories that are used to write in at different levels
-        if is_linux_tutorial:
-            root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\"
-            root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\"
-            root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\"
-            root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\"
-        else:
-            root_dir_generic = ".\\parsed_mds\\generic\\"
-            root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\"
-            root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\"
-            root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\"
-
-        # variable for the main title (needed for reference links)
-        main_title = filename[:-3]
-
-        # variable that keeps track of the directories that are used to write in at different levels
-        curr_dirs = [filename[:-3] for i in range(5)]
-
-        # variable to keep track whether we're dealing with OS-specific info or not
-        OS_specific = False
-
-        # variable that keeps track of the latest non-zero level title and corresponding directory
-        last_title_level = 1
-        last_title = None
-        last_directory = None
-        last_was_title = False
-
-        # list to keep track of links in the text
-        links_generic = []
-        links_linux = []
-        links_windows = []
-        links_macos = []
-
-        # dictionaries to keep track of current OS
-        active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
-
-        # variable that shows whether the first title has been reached yet
-        after_first_title = False
-
-        # variable that is used to be sure that we are detecting titles and not comments from codeblocks
-        in_code_block = False
-
-        ################### actually parse the md file ###################
-
-        # create directories for the source markdown file
-        create_directory(root_dir_generic)
-        create_directory(".\\parsed_mds\\os_specific")
-        create_directory(root_dir_os_specific_linux)
-        create_directory(root_dir_os_specific_windows)
-        create_directory(root_dir_os_specific_macos)
-        create_directory(root_dir_generic + curr_dirs[0])
-        create_directory(root_dir_os_specific_linux + curr_dirs[0])
-        create_directory(root_dir_os_specific_windows + curr_dirs[0])
-        create_directory(root_dir_os_specific_macos + curr_dirs[0])
-
-        # process the jinja macros
-        jinja_parser(filename, copy_file)
-
-        # convert the files without proper markdown layout into markdown using pandoc
-        if "linux-tutorial" in filenames[filename] and filename in problem_files:
-            pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file)
-
-        # open the file and store line by line in the right file
-        with open(copy_file, 'r') as readfile:
-
-            for line in readfile:
-                title_level, title, directory = check_for_title(line)
-
-                detect_in_code_block(line)
-
-                # line is a title with a maximum depth of 4
-                if title_level > 0:
-                    last_title_level = title_level
-                    last_title = title
-                    last_directory = directory
-                    after_first_title = True
-
-                # line is not a title
-                elif after_first_title:
-                    # check for if-statements and write the appropriate lines in the right files
-                    next_action = check_if_statements(line)
-                    while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
-                        if next_action[0] == "write_text_and_check_extra_message":
+            # make a copy of the original file in order to make sure the original does not get altered
+            if is_linux_tutorial:
+                copy_file = ".\\copies\\linux\\" + filename
+            else:
+                copy_file = ".\\copies\\" + filename
+            shutil.copyfile(filenames[filename], copy_file)
+
+            # variable that keeps track of the directories that are used to write in at different levels
+            if is_linux_tutorial:
+                root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\"
+                root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\"
+                root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\"
+                root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\"
+            else:
+                root_dir_generic = ".\\parsed_mds\\generic\\"
+                root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\"
+                root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\"
+                root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\"
+
+            # variable for the main title (needed for reference links)
+            main_title = filename[:-3]
+
+            # variable that keeps track of the directories that are used to write in at different levels
+            curr_dirs = [filename[:-3] for i in range(5)]
+
+            # variable to keep track whether we're dealing with OS-specific info or not
+            OS_specific = False
+
+            # variable that keeps track of the latest non-zero level title and corresponding directory
+            last_title_level = 1
+            last_title = None
+            last_directory = None
+            last_was_title = False
+
+            # list to keep track of links in the text
+            links_generic = []
+            links_linux = []
+            links_windows = []
+            links_macos = []
+
+            # dictionaries to keep track of current OS
+            active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
+
+            # variable that shows whether the first title has been reached yet
+            after_first_title = False
+
+            # variable that is used to be sure that we are detecting titles and not comments from codeblocks
+            in_code_block = False
+
+            ################### actually parse the md file ###################
+
+            # create directories for the source markdown file
+            create_directory(root_dir_generic)
+            create_directory(".\\parsed_mds\\os_specific")
+            create_directory(root_dir_os_specific_linux)
+            create_directory(root_dir_os_specific_windows)
+            create_directory(root_dir_os_specific_macos)
+            create_directory(root_dir_generic + curr_dirs[0])
+            create_directory(root_dir_os_specific_linux + curr_dirs[0])
+            create_directory(root_dir_os_specific_windows + curr_dirs[0])
+            create_directory(root_dir_os_specific_macos + curr_dirs[0])
+
+            # process the jinja macros
+            jinja_parser(filename, copy_file)
+
+            # convert the files without proper markdown layout into markdown using pandoc
+            if "linux-tutorial" in filenames[filename] and filename in problem_files:
+                pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file)
+
+            # open the file and store line by line in the right file
+            with open(copy_file, 'r') as readfile:
+
+                for line in readfile:
+                    title_level, title, directory = check_for_title(line)
+
+                    detect_in_code_block(line)
+
+                    # line is a title with a maximum depth of 4
+                    if title_level > 0:
+                        last_title_level = title_level
+                        last_title = title
+                        last_directory = directory
+                        after_first_title = True
+
+                    # line is not a title
+                    elif after_first_title:
+                        # check for if-statements and write the appropriate lines in the right files
+                        next_action = check_if_statements(line)
+                        while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
+                            if next_action[0] == "write_text_and_check_extra_message":
+                                choose_and_write_to_file(next_action[2])
+                            next_action = check_if_statements(next_action[1])
+
+                        if next_action[0] == "write_text":
                             choose_and_write_to_file(next_action[2])
-                        next_action = check_if_statements(next_action[1])
-
-                    if next_action[0] == "write_text":
-                        choose_and_write_to_file(next_action[2])
-
-        # write end of file for the last file
-        write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial)
-        write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
-                          links_linux, is_linux_tutorial)
-        write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
-                          links_windows, is_linux_tutorial)
-        write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
-                          links_macos, is_linux_tutorial)
-
-remove_directory_tree(".\\copies")
-remove_directory_tree(".\\if_mangled_files")
+
+            # write end of file for the last file
+            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial)
+            write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
+                              links_linux, is_linux_tutorial)
+            write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
+                              links_windows, is_linux_tutorial)
+            write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
+                              links_macos, is_linux_tutorial)
+
+    remove_directory_tree(".\\copies")
+    remove_directory_tree(".\\if_mangled_files")
+
+
+main()
 # TODO: reconsider maximum depth to be detected as title (now at four)
 # TODO: adapt script to be used from command line

From b8ae7066d089202d8554ae5f00e98a9281d7c25d Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 14:16:27 +0200
Subject: [PATCH 020/152] make file paths non os-specific

---
 .../chatbot_parser.py                         | 106 +++++++++---------
 1 file changed, 56 insertions(+), 50 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 79951a5d0da..f67d0f0d529 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -12,7 +12,7 @@
 ################### define global variables ###################
 
 # variable that keeps track of the source directories
-source_directories = ["..\\..\\mkdocs\\docs\\HPC\\", "..\\..\\mkdocs\\docs\\HPC\\linux-tutorial"]
+source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")]
 
 # list of all the filenames
 filenames_generic = {}
@@ -67,22 +67,21 @@ def check_for_title(curr_line):
         return 0, None, None
     else:
         if last_title is not None:
-            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial)
-            write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
+            write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial)
+            write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux",
                               links_linux, is_linux_tutorial)
-            write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
+            write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), "Windows",
                               links_windows, is_linux_tutorial)
-            write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
+            write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS",
                               links_macos, is_linux_tutorial)
             reset_link_lists()
 
-        curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + make_valid_title(
-            curr_line[logic_output + 1:-1].replace(' ', '-'))
+        curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')))
 
-        create_directory(root_dir_generic + curr_dirs[logic_output])
-        create_directory(root_dir_os_specific_linux + curr_dirs[logic_output])
-        create_directory(root_dir_os_specific_windows + curr_dirs[logic_output])
-        create_directory(root_dir_os_specific_macos + curr_dirs[logic_output])
+        create_directory(os.path.join(root_dir_generic,  curr_dirs[logic_output]))
+        create_directory(os.path.join(root_dir_os_specific_linux,  curr_dirs[logic_output]))
+        create_directory(os.path.join(root_dir_os_specific_windows,  curr_dirs[logic_output]))
+        create_directory(os.path.join(root_dir_os_specific_macos,  curr_dirs[logic_output]))
 
         update_lower_curr_dir(curr_dirs[logic_output], logic_output)
         return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output]
@@ -127,8 +126,12 @@ def replace_markdown_markers(curr_line, linklist):
 
 # function that let's jinja do its thing to format the files expect for the os-related if-statements
 def jinja_parser(filename, copy_location):
+
+    # YAML file location
+    yml_file_path = os.path.join('..', '..', 'mkdocs', 'extra', 'gent.yml')
+
     # Read the YAML file
-    with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file:
+    with open(yml_file_path, 'r') as yml_file:
         words_dict = yaml.safe_load(yml_file)
 
     # ugly fix for index.md error
@@ -143,7 +146,7 @@ def jinja_parser(filename, copy_location):
     mangle_ifs(copy_location, filename)
 
     # Use Jinja2 to replace the macros
-    template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")])
+    template_loader = ChoiceLoader([FileSystemLoader(searchpath='if_mangled_files'), FileSystemLoader(searchpath=os.path.join("..", "..", "mkdocs", "docs", "HPC"))])
     templateEnv = Environment(loader=template_loader)
     template = templateEnv.get_template(filename)
     rendered_content = template.render(combined_context)
@@ -214,7 +217,7 @@ def mangle_os_ifs(line):
 
 
 def mangle_ifs(directory, file):
-    with open(".\\if_mangled_files\\" + file, 'w') as write_file:
+    with open(os.path.join("if_mangled_files",  file), 'w') as write_file:
         with open(directory, 'r') as read_file:
             for line in read_file:
                 new_line = mangle_os_ifs(line)
@@ -305,13 +308,13 @@ def choose_and_write_to_file(curr_line):
     # check that the line is part of the website for gent
     if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \
             active_OS_if_states["macos"] == "inactive":
-        write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line)
+        write_text_to_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), curr_line)
     if active_OS_if_states["linux"] == "active":
-        write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line)
+        write_text_to_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), curr_line)
     if active_OS_if_states["windows"] == "active":
-        write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line)
+        write_text_to_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), curr_line)
     if active_OS_if_states["macos"] == "active":
-        write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line)
+        write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line)
 
 
 # function that adds a reference link at the end of every txt file
@@ -356,25 +359,26 @@ def make_valid_title(s):
 
     return valid_filename
 
+
 def main():
     global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
-    remove_directory_tree(".\\parsed_mds")
-    remove_directory_tree(".\\copies")
-    remove_directory_tree(".\\if_mangled_files")
+    remove_directory_tree("parsed_mds")
+    remove_directory_tree("copies")
+    remove_directory_tree("if_mangled_files")
 
     # make the necessary directories
-    if not os.path.exists(".\\copies"):
-        os.mkdir(".\\copies")
+    if not os.path.exists("copies"):
+        os.mkdir("copies")
 
-    if not os.path.exists(".\\copies\\linux"):
-        os.mkdir(".\\copies\\linux")
+    if not os.path.exists(os.path.join("copies", "linux")):
+        os.mkdir(os.path.join("copies", "linux"))
 
-    if not os.path.exists(".\\parsed_mds"):
-        os.mkdir(".\\parsed_mds")
+    if not os.path.exists("parsed_mds"):
+        os.mkdir("parsed_mds")
 
-    if not os.path.exists(".\\if_mangled_files"):
-        os.mkdir(".\\if_mangled_files")
+    if not os.path.exists("if_mangled_files"):
+        os.mkdir("if_mangled_files")
 
     for filenames in [filenames_generic, filenames_linux]:
         for filename in filenames.keys():
@@ -385,22 +389,22 @@ def main():
 
             # make a copy of the original file in order to make sure the original does not get altered
             if is_linux_tutorial:
-                copy_file = ".\\copies\\linux\\" + filename
+                copy_file = os.path.join("copies", "linux",  filename)
             else:
-                copy_file = ".\\copies\\" + filename
+                copy_file = os.path.join("copies", filename)
             shutil.copyfile(filenames[filename], copy_file)
 
             # variable that keeps track of the directories that are used to write in at different levels
             if is_linux_tutorial:
-                root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\"
-                root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\"
-                root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\"
-                root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\"
+                root_dir_generic = os.path.join("parsed_mds", "generic", "linux_tutorial")
+                root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux", "linux_tutorial")
+                root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows", "linux_tutorial")
+                root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos", "linux_tutorial")
             else:
-                root_dir_generic = ".\\parsed_mds\\generic\\"
-                root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\"
-                root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\"
-                root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\"
+                root_dir_generic = os.path.join("parsed_mds", "generic")
+                root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux")
+                root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows")
+                root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos")
 
             # variable for the main title (needed for reference links)
             main_title = filename[:-3]
@@ -436,14 +440,14 @@ def main():
 
             # create directories for the source markdown file
             create_directory(root_dir_generic)
-            create_directory(".\\parsed_mds\\os_specific")
+            create_directory(os.path.join("parsed_mds", "os_specific"))
             create_directory(root_dir_os_specific_linux)
             create_directory(root_dir_os_specific_windows)
             create_directory(root_dir_os_specific_macos)
-            create_directory(root_dir_generic + curr_dirs[0])
-            create_directory(root_dir_os_specific_linux + curr_dirs[0])
-            create_directory(root_dir_os_specific_windows + curr_dirs[0])
-            create_directory(root_dir_os_specific_macos + curr_dirs[0])
+            create_directory(os.path.join(root_dir_generic, curr_dirs[0]))
+            create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[0]))
+            create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[0]))
+            create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[0]))
 
             # process the jinja macros
             jinja_parser(filename, copy_file)
@@ -480,16 +484,18 @@ def main():
                             choose_and_write_to_file(next_action[2])
 
             # write end of file for the last file
-            write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial)
-            write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux",
+            write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic,
+                              is_linux_tutorial)
+            write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux",
                               links_linux, is_linux_tutorial)
-            write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows",
+            write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"),
+                              "Windows",
                               links_windows, is_linux_tutorial)
-            write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS",
+            write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS",
                               links_macos, is_linux_tutorial)
 
-    remove_directory_tree(".\\copies")
-    remove_directory_tree(".\\if_mangled_files")
+    remove_directory_tree("copies")
+    remove_directory_tree("if_mangled_files")
 
 
 main()

From b7514973facd2edeb274161b67ae4eee53140229 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 14:54:41 +0200
Subject: [PATCH 021/152] use docstrings to document the functions

---
 .../chatbot_parser.py                         | 143 +++++++++++++++---
 1 file changed, 125 insertions(+), 18 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index f67d0f0d529..f5596fd5b1d 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -34,14 +34,24 @@
 
 
 ################### define functions ###################
-# function that removes the previous file structure before starting the process of making a new one
 def remove_directory_tree(old_directory):
+    """
+    function that removes a full directory tree
+
+    :param old_directory: the directory to be removed
+    :return:
+    """
     if os.path.exists(old_directory):
         shutil.rmtree(old_directory)
 
 
-# function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title)
 def check_for_title_logic(curr_line):
+    """
+    function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title)
+
+    :param curr_line: the line to be checked for a title
+    :return: depth of the title
+    """
     global curr_dirs
     match = re.match(r'^#+ ', curr_line)
     if match and len(match.group(0)) <= 5:
@@ -50,8 +60,12 @@ def check_for_title_logic(curr_line):
         return 0
 
 
-# function that resets the contents of the link_lists
 def reset_link_lists():
+    """
+    function that resets the contents of the link_lists
+
+    :return:
+    """
     global links_generic, links_linux, links_windows, links_macos
     links_generic = []
     links_linux = []
@@ -59,8 +73,15 @@ def reset_link_lists():
     links_macos = []
 
 
-# function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables
 def check_for_title(curr_line):
+    """
+    function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables
+
+    :param curr_line: the line to be checked for a title
+    :return: the depth of the title
+    :return: the title found in the line if any
+    :return: the new directory in which the next file will be written
+    """
     global curr_dirs, last_title, in_code_block
     logic_output = check_for_title_logic(curr_line)
     if logic_output == 0 or in_code_block:
@@ -87,28 +108,51 @@ def check_for_title(curr_line):
         return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output]
 
 
-# function used to detect codeblocks and make sure the comments don't get detected as titles
 def detect_in_code_block(curr_line):
+    """
+    function used to detect codeblocks and make sure the comments don't get detected as titles
+
+    :param curr_line: the line in which the start or end of a codeblock needs to be detected
+    :return:
+    """
     global in_code_block
     if '```' in curr_line or (('<pre><code>' in curr_line) ^ ('</code></pre>' in curr_line)):
         in_code_block = not in_code_block
 
 
-# function that creates directories if needed
 def create_directory(new_directory):
+    """
+    function that creates new directories
+
+    :param new_directory: directory to be created
+    :return:
+    """
     if not os.path.exists(new_directory):
         os.mkdir(new_directory)
 
 
-# function that updates the curr_dir variables when needed
 def update_lower_curr_dir(curr_directory, level):
+    """
+    function that updates the curr_dir variables when needed
+
+    :param curr_directory: the current directory to which the lower level current directories need to be updated
+    :param level: the depth of the current directory
+    :return:
+    """
     global curr_dirs
     for i in range(level + 1, 4):
         curr_dirs[i] = curr_directory
 
 
-# function that replaces certain markdown structures with the equivalent used on the website
 def replace_markdown_markers(curr_line, linklist):
+    """
+    function that replaces certain markdown structures with the equivalent used on the website
+
+    :param curr_line: the current line on which markdown structures need to be replaced
+    :param linklist: the list used to store links that need to be printed at the end of the file
+    :return curr_line: the adapted current line
+    :return linklist: the updated linklist
+    """
     # replace links with a reference
     matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line)
     if matches:
@@ -124,9 +168,14 @@ def replace_markdown_markers(curr_line, linklist):
     return curr_line, linklist
 
 
-# function that let's jinja do its thing to format the files expect for the os-related if-statements
 def jinja_parser(filename, copy_location):
+    """
+    function that let's jinja do its thing to format the files except for the os-related if-statements
 
+    :param filename: the name of the file that needs to be formatted using jinja
+    :param copy_location: the location of the file that needs to be formatted using jinja
+    :return:
+    """
     # YAML file location
     yml_file_path = os.path.join('..', '..', 'mkdocs', 'extra', 'gent.yml')
 
@@ -157,6 +206,12 @@ def jinja_parser(filename, copy_location):
 
 
 def mangle_os_ifs(line):
+    """
+    function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure.
+
+    :param line: the current line to check for os-related if-statements
+    :return line: the modified line with  mangled os-related if-statements
+    """
     global is_os
 
     match = re.search(r'\{%(.*?)%}(.*)', line)
@@ -217,6 +272,13 @@ def mangle_os_ifs(line):
 
 
 def mangle_ifs(directory, file):
+    """
+    function that writes the if-mangled version of a file to a location where the jinja parser will use it
+
+    :param directory: the directory of the file to be if mangled
+    :param file: the filename of the file to be mangled
+    :return:
+    """
     with open(os.path.join("if_mangled_files",  file), 'w') as write_file:
         with open(directory, 'r') as read_file:
             for line in read_file:
@@ -224,8 +286,19 @@ def mangle_ifs(directory, file):
                 write_file.write(new_line)
 
 
-# function that checks for if-statements
 def check_if_statements(curr_line):
+    """
+    function that checks for if-statements
+
+    :param curr_line: the line to be checked for if-statements to build the directory structure
+    :return: the next action to be done with the line:
+                "done": An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed.
+                "check_extra_message": An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked.
+                "write_text": No if-statement has been found, write the current line to a file (can also be part of the current line)
+                "write_text_and_check_extra_message": An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line.
+    :return: the extra message to be checked, if any
+    :return: the text to be written to the file, if any
+    """
     # check whether the first part of the line contains information wrt if-statements
     match = re.search(r'^\{-if-%([^%]*)%-if-}(.*)', curr_line)
 
@@ -288,8 +361,14 @@ def check_if_statements(curr_line):
         return "write_text", None, curr_line
 
 
-# function that writes a line to a file
 def write_text_to_file(file_name, curr_line):
+    """
+    function that writes a line to a file
+
+    :param file_name: target file to write the line to
+    :param curr_line: line to be written to the file
+    :return:
+    """
     global links_generic, links_linux, links_windows, links_macos
     with open(file_name, "a") as write_file:
         if "generic" in file_name:
@@ -303,8 +382,13 @@ def write_text_to_file(file_name, curr_line):
         write_file.write(curr_line)
 
 
-# function that decides what file to write text to
 def choose_and_write_to_file(curr_line):
+    """
+    function that decides what file to write text to
+
+    :param curr_line: line to be written to a file
+    :return:
+    """
     # check that the line is part of the website for gent
     if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \
             active_OS_if_states["macos"] == "inactive":
@@ -317,14 +401,28 @@ def choose_and_write_to_file(curr_line):
         write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line)
 
 
-# function that adds a reference link at the end of every txt file
 def add_reference_link(file_location, reference_link):
+    """
+    function that adds a reference link at the end of every txt file
+
+    :param file_location: the file that needs a reference link
+    :param reference_link: the reference link that needs to be written
+    :return:
+    """
     with open(file_location, 'a') as write_file:
         write_file.write("\nreference: " + reference_link + "\n")
 
 
-# function that adds the links that should be at the end of a file
 def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_):
+    """
+    function that adds the links that should be at the end of a file
+
+    :param file_location: the location of the file
+    :param OS: the OS of the file
+    :param linklist: the links that should be at the end of the file
+    :param is_linux_tutorial_: boolean indicating whether the file is part of the linux tutorial
+    :return:
+    """
     if len(OS) > 0:
         OS = OS + "/"
 
@@ -343,16 +441,21 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_):
     add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-'))
 
 
-# function that makes sure all titles can be used as valid filenames
-def make_valid_title(s):
+def make_valid_title(title):
+    """
+    function that makes sure all titles can be used as valid filenames
+
+    :param title: the string that will be used as title and filename
+    :return valid_filename: the adapted title that can be used as filename
+    """
     # Define a regex pattern for invalid characters on both Windows and Linux
     invalid_chars = r'[<>:"/\\|?*\0()]'
 
     # get rid of extra information between {} brackets
-    s = re.sub(r'\{.*?}', '', s)
+    s = re.sub(r'\{.*?}', '', title)
 
     # Remove invalid characters
-    valid_filename = re.sub(invalid_chars, '', s)
+    valid_filename = re.sub(invalid_chars, '', title)
 
     # Strip leading/trailing whitespace
     valid_filename = valid_filename.strip().strip('-')
@@ -361,6 +464,10 @@ def make_valid_title(s):
 
 
 def main():
+    """
+    main function
+    :return:
+    """
     global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
     remove_directory_tree("parsed_mds")

From 0f8eb5dfa6e7cc8adee238fd7467dbc9247e0012 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 15:15:56 +0200
Subject: [PATCH 022/152] rewrite the if-mangler to make it more readable

---
 .../chatbot_parser.py                         | 35 +++++++------------
 1 file changed, 13 insertions(+), 22 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index f5596fd5b1d..4f1865b9411 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -225,35 +225,27 @@ def mangle_os_ifs(line):
         if_match = re.search(r'if ', match.group(1))
         if_os_match = re.search(r'if OS == ', match.group(1))
         endif_match = re.search(r'endif', match.group(1))
+        pos_first_mangle = constr_match.start() + start_index + added_length + 1
+        pos_second_mangle = constr_match.end() + start_index + added_length - 1
 
+        # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these
         if endif_match:
-            if is_os == 2:
-                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
-                added_length += 8
-                is_os = 0
-            if is_os == 3:
-                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
+            if is_os == 2 or is_os == 3:
+                line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:]
                 added_length += 8
-                is_os = 2
+                if is_os == 2:
+                    is_os = 0
+                elif is_os == 3:
+                    is_os = 2
             elif is_os == 1:
                 is_os = 2
         elif if_match:
             if if_os_match:
+                line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:]
+                added_length += 8
                 if is_os == 2:
-                    line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                                   constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                         constr_match.end() + start_index + added_length - 1:]
-                    added_length += 8
                     is_os = 3
                 else:
-                    line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                                   constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                         constr_match.end() + start_index + added_length - 1:]
-                    added_length += 8
                     is_os = 2
             else:
                 if is_os == 2:
@@ -262,10 +254,9 @@ def mangle_os_ifs(line):
                     is_os = 0
         else:
             if is_os == 2 or is_os == 3:
-                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[
-                                                                                               constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[
-                                                                                                                                                                                                                     constr_match.end() + start_index + added_length - 1:]
+                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[constr_match.end() + start_index + added_length - 1:]
                 added_length += 8
+
         start_index += constr_match.end()
         match = re.search(r'\{%(.*?)%}(.*)', match.group(2))
     return line

From 9938e921674d5e46a3917feef8a780f22b427440 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 16:09:23 +0200
Subject: [PATCH 023/152] got rid of most global variables

---
 .../chatbot_parser.py                         | 162 +++++++-----------
 1 file changed, 63 insertions(+), 99 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 4f1865b9411..e7a88e4de7c 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -29,9 +29,6 @@
 # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc
 problem_files = ["getting_started.md", "navigating.md"]
 
-# global variable to keep track of latest if-statement scope
-is_os = 0  # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
-
 
 ################### define functions ###################
 def remove_directory_tree(old_directory):
@@ -45,57 +42,48 @@ def remove_directory_tree(old_directory):
         shutil.rmtree(old_directory)
 
 
-def check_for_title_logic(curr_line):
-    """
-    function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title)
-
-    :param curr_line: the line to be checked for a title
-    :return: depth of the title
-    """
-    global curr_dirs
-    match = re.match(r'^#+ ', curr_line)
-    if match and len(match.group(0)) <= 5:
-        return len(match.group(0)) - 1
-    else:
-        return 0
-
-
-def reset_link_lists():
-    """
-    function that resets the contents of the link_lists
-
-    :return:
-    """
-    global links_generic, links_linux, links_windows, links_macos
-    links_generic = []
-    links_linux = []
-    links_windows = []
-    links_macos = []
-
-
-def check_for_title(curr_line):
+def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial_, in_code_block_):
     """
     function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables
 
     :param curr_line: the line to be checked for a title
+    :param main_title: the main title of the file, needed in the case where a file is finished
+    :param last_directory: the most recently encountered directory
+    :param last_title: the most recently encountered title
+    :param curr_dirs: the most recent directories at each title level
+    :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial
+    :param in_code_block_: boolean to indicate whether the current line is part of a codeblock
     :return: the depth of the title
     :return: the title found in the line if any
     :return: the new directory in which the next file will be written
     """
-    global curr_dirs, last_title, in_code_block
-    logic_output = check_for_title_logic(curr_line)
-    if logic_output == 0 or in_code_block:
-        return 0, None, None
+    global links_generic, links_linux, links_windows, links_macos
+
+    # detect titles
+    match = re.match(r'^#+ ', curr_line)
+    if match and len(match.group(0)) <= 5:
+        logic_output = len(match.group(0)) - 1
+    else:
+        logic_output = 0
+
+    # make necessary changes if a title has been detected
+    if logic_output == 0 or in_code_block_:
+        return 0, None, None, curr_dirs
     else:
         if last_title is not None:
-            write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial)
+            write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial_, main_title, last_title)
             write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux",
-                              links_linux, is_linux_tutorial)
+                              links_linux, is_linux_tutorial_, main_title, last_title)
             write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), "Windows",
-                              links_windows, is_linux_tutorial)
+                              links_windows, is_linux_tutorial_, main_title, last_title)
             write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS",
-                              links_macos, is_linux_tutorial)
-            reset_link_lists()
+                              links_macos, is_linux_tutorial_, main_title, last_title)
+
+            # reset the link lists
+            links_generic = []
+            links_linux = []
+            links_windows = []
+            links_macos = []
 
         curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')))
 
@@ -104,20 +92,11 @@ def check_for_title(curr_line):
         create_directory(os.path.join(root_dir_os_specific_windows,  curr_dirs[logic_output]))
         create_directory(os.path.join(root_dir_os_specific_macos,  curr_dirs[logic_output]))
 
-        update_lower_curr_dir(curr_dirs[logic_output], logic_output)
-        return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output]
-
-
-def detect_in_code_block(curr_line):
-    """
-    function used to detect codeblocks and make sure the comments don't get detected as titles
+        # update the lower order current directories
+        for i in range(logic_output + 1, 4):
+            curr_dirs[i] = curr_dirs[logic_output]
 
-    :param curr_line: the line in which the start or end of a codeblock needs to be detected
-    :return:
-    """
-    global in_code_block
-    if '```' in curr_line or (('<pre><code>' in curr_line) ^ ('</code></pre>' in curr_line)):
-        in_code_block = not in_code_block
+        return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs
 
 
 def create_directory(new_directory):
@@ -131,19 +110,6 @@ def create_directory(new_directory):
         os.mkdir(new_directory)
 
 
-def update_lower_curr_dir(curr_directory, level):
-    """
-    function that updates the curr_dir variables when needed
-
-    :param curr_directory: the current directory to which the lower level current directories need to be updated
-    :param level: the depth of the current directory
-    :return:
-    """
-    global curr_dirs
-    for i in range(level + 1, 4):
-        curr_dirs[i] = curr_directory
-
-
 def replace_markdown_markers(curr_line, linklist):
     """
     function that replaces certain markdown structures with the equivalent used on the website
@@ -205,14 +171,14 @@ def jinja_parser(filename, copy_location):
         output_file.write(rendered_content)
 
 
-def mangle_os_ifs(line):
+def mangle_os_ifs(line, is_os):
     """
     function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure.
 
     :param line: the current line to check for os-related if-statements
+    :param is_os: boolean keep track of the current os-state of the if-statements. Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
     :return line: the modified line with  mangled os-related if-statements
     """
-    global is_os
 
     match = re.search(r'\{%(.*?)%}(.*)', line)
 
@@ -259,7 +225,7 @@ def mangle_os_ifs(line):
 
         start_index += constr_match.end()
         match = re.search(r'\{%(.*?)%}(.*)', match.group(2))
-    return line
+    return line, is_os
 
 
 def mangle_ifs(directory, file):
@@ -270,18 +236,22 @@ def mangle_ifs(directory, file):
     :param file: the filename of the file to be mangled
     :return:
     """
+    # variable to keep track of latest if-statement scope
+    is_os = 0  # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
+
     with open(os.path.join("if_mangled_files",  file), 'w') as write_file:
         with open(directory, 'r') as read_file:
             for line in read_file:
-                new_line = mangle_os_ifs(line)
+                new_line, is_os = mangle_os_ifs(line, is_os)
                 write_file.write(new_line)
 
 
-def check_if_statements(curr_line):
+def check_if_statements(curr_line, active_OS_if_states):
     """
     function that checks for if-statements
 
     :param curr_line: the line to be checked for if-statements to build the directory structure
+    :param active_OS_if_states: dictionary keeping track of the active OS states according to the if-statements
     :return: the next action to be done with the line:
                 "done": An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed.
                 "check_extra_message": An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked.
@@ -373,11 +343,14 @@ def write_text_to_file(file_name, curr_line):
         write_file.write(curr_line)
 
 
-def choose_and_write_to_file(curr_line):
+def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title):
     """
     function that decides what file to write text to
 
     :param curr_line: line to be written to a file
+    :param active_OS_if_states: dictionary keeping track of which OSes are active according to the if-statements
+    :param last_directory: most recently made directory
+    :param last_title: the most recently encountered title
     :return:
     """
     # check that the line is part of the website for gent
@@ -392,19 +365,7 @@ def choose_and_write_to_file(curr_line):
         write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line)
 
 
-def add_reference_link(file_location, reference_link):
-    """
-    function that adds a reference link at the end of every txt file
-
-    :param file_location: the file that needs a reference link
-    :param reference_link: the reference link that needs to be written
-    :return:
-    """
-    with open(file_location, 'a') as write_file:
-        write_file.write("\nreference: " + reference_link + "\n")
-
-
-def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_):
+def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_title, last_title):
     """
     function that adds the links that should be at the end of a file
 
@@ -412,6 +373,8 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_):
     :param OS: the OS of the file
     :param linklist: the links that should be at the end of the file
     :param is_linux_tutorial_: boolean indicating whether the file is part of the linux tutorial
+    :param main_title: the main title of the file, to be used in the reference link
+    :param last_title: the most recently encountered title
     :return:
     """
     if len(OS) > 0:
@@ -429,7 +392,8 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_):
         linux_part = ""
 
     # finally add the reference link
-    add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-'))
+    with open(file_location, 'a') as write_file:
+        write_file.write("\nreference: docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-') + "\n")
 
 
 def make_valid_title(title):
@@ -459,7 +423,7 @@ def main():
     main function
     :return:
     """
-    global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos
+    global root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, links_generic, links_linux, links_windows, links_macos
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
     remove_directory_tree("parsed_mds")
     remove_directory_tree("copies")
@@ -558,9 +522,10 @@ def main():
             with open(copy_file, 'r') as readfile:
 
                 for line in readfile:
-                    title_level, title, directory = check_for_title(line)
+                    title_level, title, directory, curr_dirs = check_for_title(line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial, in_code_block)
 
-                    detect_in_code_block(line)
+                    if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+                        in_code_block = not in_code_block
 
                     # line is a title with a maximum depth of 4
                     if title_level > 0:
@@ -572,25 +537,24 @@ def main():
                     # line is not a title
                     elif after_first_title:
                         # check for if-statements and write the appropriate lines in the right files
-                        next_action = check_if_statements(line)
+                        next_action = check_if_statements(line, active_OS_if_states)
                         while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
                             if next_action[0] == "write_text_and_check_extra_message":
-                                choose_and_write_to_file(next_action[2])
-                            next_action = check_if_statements(next_action[1])
+                                choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title)
+                            next_action = check_if_statements(next_action[1], active_OS_if_states)
 
                         if next_action[0] == "write_text":
-                            choose_and_write_to_file(next_action[2])
+                            choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title)
 
             # write end of file for the last file
             write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic,
-                              is_linux_tutorial)
+                              is_linux_tutorial, main_title, last_title)
             write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux",
-                              links_linux, is_linux_tutorial)
+                              links_linux, is_linux_tutorial, main_title, last_title)
             write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"),
-                              "Windows",
-                              links_windows, is_linux_tutorial)
+                              "Windows", links_windows, is_linux_tutorial, main_title, last_title)
             write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS",
-                              links_macos, is_linux_tutorial)
+                              links_macos, is_linux_tutorial, main_title, last_title)
 
     remove_directory_tree("copies")
     remove_directory_tree("if_mangled_files")

From 508b22c7b3a485f8fdb64059a45d2ee9dfdd4f04 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 16:30:48 +0200
Subject: [PATCH 024/152] fixed some issues with if statements

---
 .../chatbot_parser.py                         | 31 +++++++++++++------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index e7a88e4de7c..bbab687bd39 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -12,7 +12,7 @@
 ################### define global variables ###################
 
 # variable that keeps track of the source directories
-source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")]
+source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC")]#, os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")]
 
 # list of all the filenames
 filenames_generic = {}
@@ -189,7 +189,7 @@ def mangle_os_ifs(line, is_os):
 
         constr_match = re.search(r'\{%.*?%}', match.string)
         if_match = re.search(r'if ', match.group(1))
-        if_os_match = re.search(r'if OS == ', match.group(1))
+        if_os_match = re.search(r'if OS ', match.group(1))
         endif_match = re.search(r'endif', match.group(1))
         pos_first_mangle = constr_match.start() + start_index + added_length + 1
         pos_second_mangle = constr_match.end() + start_index + added_length - 1
@@ -228,18 +228,18 @@ def mangle_os_ifs(line, is_os):
     return line, is_os
 
 
-def mangle_ifs(directory, file):
+def mangle_ifs(directory, filename):
     """
     function that writes the if-mangled version of a file to a location where the jinja parser will use it
 
     :param directory: the directory of the file to be if mangled
-    :param file: the filename of the file to be mangled
+    :param filename: the filename of the file to be mangled
     :return:
     """
     # variable to keep track of latest if-statement scope
     is_os = 0  # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
 
-    with open(os.path.join("if_mangled_files",  file), 'w') as write_file:
+    with open(os.path.join("if_mangled_files",  filename), 'w') as write_file:
         with open(directory, 'r') as read_file:
             for line in read_file:
                 new_line, is_os = mangle_os_ifs(line, is_os)
@@ -261,17 +261,17 @@ def check_if_statements(curr_line, active_OS_if_states):
     :return: the text to be written to the file, if any
     """
     # check whether the first part of the line contains information wrt if-statements
-    match = re.search(r'^\{-if-%([^%]*)%-if-}(.*)', curr_line)
+    match = re.search(r'^\{-if-%(.*?)%-if-}(.*)', curr_line)
 
     # check whether the line contains information wrt if-statements that is not in its first part
-    match_large = re.search(r'^(.*)(\{-if-%[^%]*%-if-})(.*)', curr_line)
+    match_large = re.search(r'^(.*)(\{-if-%.*?%-if-})(.*)', curr_line)
 
     if match:
         content = match.group(1)
 
         # new if-statement wrt OS
         if re.search(r'if OS == ', content):
-            OS = content[9:-1]
+            OS = content.split()[-1]
 
             # set new active OS
             active_OS_if_states[OS] = "active"
@@ -281,6 +281,17 @@ def check_if_statements(curr_line, active_OS_if_states):
                 if other_OS != OS and active_OS_if_states[other_OS] == "active":
                     active_OS_if_states[other_OS] = "inactive"
 
+        elif re.search(r'if OS != ', content):
+            OS = content.split()[-1]
+
+            # set new active OS
+            active_OS_if_states[OS] = "inactive"
+
+            # set other inactive ones on active
+            for other_OS in active_OS_if_states.keys():
+                if other_OS != OS and active_OS_if_states[other_OS] == "inactive":
+                    active_OS_if_states[other_OS] = "active"
+
         # endif statement wrt OS
         elif re.search(r'endif', content):
             if str(1) in active_OS_if_states.values():
@@ -556,8 +567,8 @@ def main():
             write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS",
                               links_macos, is_linux_tutorial, main_title, last_title)
 
-    remove_directory_tree("copies")
-    remove_directory_tree("if_mangled_files")
+    # remove_directory_tree("copies")
+    # remove_directory_tree("if_mangled_files")
 
 
 main()

From a25ce2dc8bf7ffb9f732b6eb7e796ad54cd724b5 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 16:36:04 +0200
Subject: [PATCH 025/152] fixed some issues with if statements

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index bbab687bd39..fb7d8a8b176 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -12,7 +12,7 @@
 ################### define global variables ###################
 
 # variable that keeps track of the source directories
-source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC")]#, os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")]
+source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")]
 
 # list of all the filenames
 filenames_generic = {}

From 80d0535a74564ecd18f626d5c18568cc17c7d7fc Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 13 Aug 2024 17:03:07 +0200
Subject: [PATCH 026/152] got rid of all global variables

---
 .../chatbot_parser.py                         | 125 +++++++++---------
 1 file changed, 65 insertions(+), 60 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index fb7d8a8b176..eb30cb7fb6a 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -5,30 +5,6 @@
 import yaml
 from jinja2 import FileSystemLoader, Environment, ChoiceLoader
 
-# variables for analytics
-succeeded = 0
-failed = 0
-
-################### define global variables ###################
-
-# variable that keeps track of the source directories
-source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")]
-
-# list of all the filenames
-filenames_generic = {}
-filenames_linux = {}
-for source_directory in source_directories:
-    all_items = os.listdir(source_directory)
-    files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
-    for file in files:
-        if "linux-tutorial" in source_directory:
-            filenames_linux[file] = os.path.join(source_directory, file)
-        else:
-            filenames_generic[file] = os.path.join(source_directory, file)
-
-# some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc
-problem_files = ["getting_started.md", "navigating.md"]
-
 
 ################### define functions ###################
 def remove_directory_tree(old_directory):
@@ -42,7 +18,7 @@ def remove_directory_tree(old_directory):
         shutil.rmtree(old_directory)
 
 
-def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial_, in_code_block_):
+def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_):
     """
     function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables
 
@@ -51,13 +27,15 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
     :param last_directory: the most recently encountered directory
     :param last_title: the most recently encountered title
     :param curr_dirs: the most recent directories at each title level
+    :param root_dirs: a list containing the root directories
+    param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file
     :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial
     :param in_code_block_: boolean to indicate whether the current line is part of a codeblock
     :return: the depth of the title
     :return: the title found in the line if any
     :return: the new directory in which the next file will be written
+    :return link_lists: updated link_lists
     """
-    global links_generic, links_linux, links_windows, links_macos
 
     # detect titles
     match = re.match(r'^#+ ', curr_line)
@@ -68,35 +46,35 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
 
     # make necessary changes if a title has been detected
     if logic_output == 0 or in_code_block_:
-        return 0, None, None, curr_dirs
+        return 0, None, None, curr_dirs, link_lists
     else:
         if last_title is not None:
-            write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial_, main_title, last_title)
-            write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux",
-                              links_linux, is_linux_tutorial_, main_title, last_title)
-            write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), "Windows",
-                              links_windows, is_linux_tutorial_, main_title, last_title)
-            write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS",
-                              links_macos, is_linux_tutorial_, main_title, last_title)
+            write_end_of_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), "", link_lists[0], is_linux_tutorial_, main_title, last_title)
+            write_end_of_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), "Linux",
+                              link_lists[1], is_linux_tutorial_, main_title, last_title)
+            write_end_of_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), "Windows",
+                              link_lists[2], is_linux_tutorial_, main_title, last_title)
+            write_end_of_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), "macOS",
+                              link_lists[3], is_linux_tutorial_, main_title, last_title)
 
             # reset the link lists
-            links_generic = []
-            links_linux = []
-            links_windows = []
-            links_macos = []
+            link_lists[0] = []
+            link_lists[1] = []
+            link_lists[2] = []
+            link_lists[3] = []
 
         curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')))
 
-        create_directory(os.path.join(root_dir_generic,  curr_dirs[logic_output]))
-        create_directory(os.path.join(root_dir_os_specific_linux,  curr_dirs[logic_output]))
-        create_directory(os.path.join(root_dir_os_specific_windows,  curr_dirs[logic_output]))
-        create_directory(os.path.join(root_dir_os_specific_macos,  curr_dirs[logic_output]))
+        create_directory(os.path.join(root_dirs[0],  curr_dirs[logic_output]))
+        create_directory(os.path.join(root_dirs[1],  curr_dirs[logic_output]))
+        create_directory(os.path.join(root_dirs[2],  curr_dirs[logic_output]))
+        create_directory(os.path.join(root_dirs[3],  curr_dirs[logic_output]))
 
         # update the lower order current directories
         for i in range(logic_output + 1, 4):
             curr_dirs[i] = curr_dirs[logic_output]
 
-        return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs
+        return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists
 
 
 def create_directory(new_directory):
@@ -333,28 +311,30 @@ def check_if_statements(curr_line, active_OS_if_states):
         return "write_text", None, curr_line
 
 
-def write_text_to_file(file_name, curr_line):
+def write_text_to_file(file_name, curr_line, link_lists):
     """
     function that writes a line to a file
 
     :param file_name: target file to write the line to
     :param curr_line: line to be written to the file
-    :return:
+    :param link_lists: list containing all the links that will be printed at the end of files
+    :return link_lists: updated link_lists
     """
-    global links_generic, links_linux, links_windows, links_macos
     with open(file_name, "a") as write_file:
         if "generic" in file_name:
-            curr_line, links_generic = replace_markdown_markers(curr_line, links_generic)
+            curr_line, links_generic = replace_markdown_markers(curr_line, link_lists[0])
         elif "linux" in file_name:
-            curr_line, links_linux = replace_markdown_markers(curr_line, links_linux)
+            curr_line, links_linux = replace_markdown_markers(curr_line, link_lists[1])
         elif "windows" in file_name:
-            curr_line, links_windows = replace_markdown_markers(curr_line, links_windows)
+            curr_line, links_windows = replace_markdown_markers(curr_line, link_lists[2])
         else:
-            curr_line, links_macos = replace_markdown_markers(curr_line, links_macos)
+            curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3])
         write_file.write(curr_line)
 
+    return link_lists
 
-def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title):
+
+def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title, root_dirs, link_lists):
     """
     function that decides what file to write text to
 
@@ -362,18 +342,22 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las
     :param active_OS_if_states: dictionary keeping track of which OSes are active according to the if-statements
     :param last_directory: most recently made directory
     :param last_title: the most recently encountered title
-    :return:
+    :param root_dirs: a list with all root directories
+    :param link_lists: list of links that need to be written at the end of the files
+    :return link_lists: an updated link_lists
     """
     # check that the line is part of the website for gent
     if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \
             active_OS_if_states["macos"] == "inactive":
-        write_text_to_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), curr_line)
+        link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists)
     if active_OS_if_states["linux"] == "active":
-        write_text_to_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), curr_line)
+        link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists)
     if active_OS_if_states["windows"] == "active":
-        write_text_to_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), curr_line)
+        link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), curr_line, link_lists)
     if active_OS_if_states["macos"] == "active":
-        write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line)
+        link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), curr_line, link_lists)
+
+    return link_lists
 
 
 def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_title, last_title):
@@ -434,7 +418,6 @@ def main():
     main function
     :return:
     """
-    global root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, links_generic, links_linux, links_windows, links_macos
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
     remove_directory_tree("parsed_mds")
     remove_directory_tree("copies")
@@ -453,6 +436,27 @@ def main():
     if not os.path.exists("if_mangled_files"):
         os.mkdir("if_mangled_files")
 
+    ################### define loop-invariant variables ###################
+
+    # variable that keeps track of the source directories
+    source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"),
+                          os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")]
+
+    # list of all the filenames
+    filenames_generic = {}
+    filenames_linux = {}
+    for source_directory in source_directories:
+        all_items = os.listdir(source_directory)
+        files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+        for file in files:
+            if "linux-tutorial" in source_directory:
+                filenames_linux[file] = os.path.join(source_directory, file)
+            else:
+                filenames_generic[file] = os.path.join(source_directory, file)
+
+    # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc (temporary, should be taken out when the original files have been converted to proper markdown)
+    problem_files = ["getting_started.md", "navigating.md"]
+
     for filenames in [filenames_generic, filenames_linux]:
         for filename in filenames.keys():
             ################### define/reset loop specific variables ###################
@@ -499,6 +503,7 @@ def main():
             links_linux = []
             links_windows = []
             links_macos = []
+            link_lists = [links_generic, links_linux, links_windows, links_macos]
 
             # dictionaries to keep track of current OS
             active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
@@ -533,7 +538,7 @@ def main():
             with open(copy_file, 'r') as readfile:
 
                 for line in readfile:
-                    title_level, title, directory, curr_dirs = check_for_title(line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial, in_code_block)
+                    title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block)
 
                     if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
                         in_code_block = not in_code_block
@@ -551,11 +556,11 @@ def main():
                         next_action = check_if_statements(line, active_OS_if_states)
                         while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
                             if next_action[0] == "write_text_and_check_extra_message":
-                                choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title)
+                                link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists)
                             next_action = check_if_statements(next_action[1], active_OS_if_states)
 
                         if next_action[0] == "write_text":
-                            choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title)
+                            link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists)
 
             # write end of file for the last file
             write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic,

From 9163a759c3d06cc7eb10185b10f177baa86f2294 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 14 Aug 2024 09:45:27 +0200
Subject: [PATCH 027/152] small changes to make file more readable

---
 .../chatbot_parser.py                         | 51 ++++++++-----------
 1 file changed, 21 insertions(+), 30 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index eb30cb7fb6a..cb3ed26fda8 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -48,29 +48,23 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
     if logic_output == 0 or in_code_block_:
         return 0, None, None, curr_dirs, link_lists
     else:
+
+        # if a new title is detected, write the end of the previous file
         if last_title is not None:
-            write_end_of_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), "", link_lists[0], is_linux_tutorial_, main_title, last_title)
-            write_end_of_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), "Linux",
-                              link_lists[1], is_linux_tutorial_, main_title, last_title)
-            write_end_of_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), "Windows",
-                              link_lists[2], is_linux_tutorial_, main_title, last_title)
-            write_end_of_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), "macOS",
-                              link_lists[3], is_linux_tutorial_, main_title, last_title)
-
-            # reset the link lists
-            link_lists[0] = []
-            link_lists[1] = []
-            link_lists[2] = []
-            link_lists[3] = []
+            for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
+                write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title)
+
+            # reset the link lists for each OS
+            for i in range(4):
+                link_lists[i] = []
 
+        # make a new directory corresponding with the new title
         curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')))
 
-        create_directory(os.path.join(root_dirs[0],  curr_dirs[logic_output]))
-        create_directory(os.path.join(root_dirs[1],  curr_dirs[logic_output]))
-        create_directory(os.path.join(root_dirs[2],  curr_dirs[logic_output]))
-        create_directory(os.path.join(root_dirs[3],  curr_dirs[logic_output]))
+        for i in range(4):
+            create_directory(os.path.join(root_dirs[i],  curr_dirs[logic_output]))
 
-        # update the lower order current directories
+        # update the higher order current directories
         for i in range(logic_output + 1, 4):
             curr_dirs[i] = curr_dirs[logic_output]
 
@@ -152,6 +146,7 @@ def jinja_parser(filename, copy_location):
 def mangle_os_ifs(line, is_os):
     """
     function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure.
+    We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements.
 
     :param line: the current line to check for os-related if-statements
     :param is_os: boolean keep track of the current os-state of the if-statements. Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
@@ -198,7 +193,7 @@ def mangle_os_ifs(line, is_os):
                     is_os = 0
         else:
             if is_os == 2 or is_os == 3:
-                line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[constr_match.end() + start_index + added_length - 1:]
+                line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:]
                 added_length += 8
 
         start_index += constr_match.end()
@@ -247,7 +242,7 @@ def check_if_statements(curr_line, active_OS_if_states):
     if match:
         content = match.group(1)
 
-        # new if-statement wrt OS
+        # new if-statement wrt OS with '=='
         if re.search(r'if OS == ', content):
             OS = content.split()[-1]
 
@@ -259,6 +254,7 @@ def check_if_statements(curr_line, active_OS_if_states):
                 if other_OS != OS and active_OS_if_states[other_OS] == "active":
                     active_OS_if_states[other_OS] = "inactive"
 
+        # new if-statement wrt OS with '!='
         elif re.search(r'if OS != ', content):
             OS = content.split()[-1]
 
@@ -347,8 +343,7 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las
     :return link_lists: an updated link_lists
     """
     # check that the line is part of the website for gent
-    if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \
-            active_OS_if_states["macos"] == "inactive":
+    if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive":
         link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists)
     if active_OS_if_states["linux"] == "active":
         link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists)
@@ -457,6 +452,7 @@ def main():
     # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc (temporary, should be taken out when the original files have been converted to proper markdown)
     problem_files = ["getting_started.md", "navigating.md"]
 
+    # for loops over all files
     for filenames in [filenames_generic, filenames_linux]:
         for filename in filenames.keys():
             ################### define/reset loop specific variables ###################
@@ -540,6 +536,7 @@ def main():
                 for line in readfile:
                     title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block)
 
+                    # detect codeblocks to make sure titles aren't detected in them
                     if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
                         in_code_block = not in_code_block
 
@@ -563,14 +560,8 @@ def main():
                             link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists)
 
             # write end of file for the last file
-            write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic,
-                              is_linux_tutorial, main_title, last_title)
-            write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux",
-                              links_linux, is_linux_tutorial, main_title, last_title)
-            write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"),
-                              "Windows", links_windows, is_linux_tutorial, main_title, last_title)
-            write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS",
-                              links_macos, is_linux_tutorial, main_title, last_title)
+            for OS in ["", "Linux", "Windows", "macOS"]:
+                write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), OS, links_generic, is_linux_tutorial, main_title, last_title)
 
     # remove_directory_tree("copies")
     # remove_directory_tree("if_mangled_files")

From 1dcffc1bac4ee341556ef29c3557bb21686eaf2d Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 14 Aug 2024 11:45:16 +0200
Subject: [PATCH 028/152] codeblocks, tips, warnings and info reformatted

---
 .../chatbot_parser.py                         | 39 +++++++++++++++++--
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index cb3ed26fda8..d8d8000bdcc 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -98,10 +98,37 @@ def replace_markdown_markers(curr_line, linklist):
             curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]")
             linklist.append(match[1])
 
-    # TODO: code-blocks
-    # TODO: tips
-    # TODO: warnings
-    # etc
+    # codeblock (with ``` -> always stands on a separate line, so line can be dropped)
+    if '```' in curr_line:
+        curr_line = ""
+
+    # structures within <>
+    match = re.findall(r'<(.*?)>', curr_line)
+    if match:
+        for i, content in enumerate(match):
+            exception_words = ['SEQUENCE', 'vsc40000', 'Session', 'OUTPUT_DIR', 'jobname', 'jobid', 'hostname', 'Enjoy the day!', 'stdout', 'stderr', 'coursecode', 'year', 'nickname', '01', 'number of ', 'user', 'home', 'software', 'module']
+            if '#include' in curr_line:
+                pass
+            elif '.' in content:
+                curr_line = re.sub(f'<{content}>', f"{content}", curr_line)
+            elif '***' in content:
+                curr_line = re.sub(r'<\*\*\*', "", re.sub(r'\*\*\*\\>', "", curr_line))
+            elif '-' in content and ' ' not in content:
+                curr_line = re.sub(f'<{content}>', f"{content}", curr_line)
+            # sometimes normal words are between <> brackets and should be excluded (ugly fix)
+            elif any(substring in content for substring in exception_words):
+                pass
+            # special cases that messed up the formatting (ugly fix)
+            elif ' files</b' in content:
+                curr_line = re.sub(r'</b>', "", curr_line)
+            elif '<>' in curr_line:
+                pass
+            else:
+                curr_line = re.sub(r'<.*?>', "", curr_line)
+
+    # structures with !!! (info, tips, warnings)
+    if '!!!' in curr_line:
+        curr_line = re.sub(r'!!!', "", curr_line)
 
     return curr_line, linklist
 
@@ -327,6 +354,9 @@ def write_text_to_file(file_name, curr_line, link_lists):
             curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3])
         write_file.write(curr_line)
 
+        # if re.search(r'<.*?>', curr_line):
+        #     print(curr_line)
+
     return link_lists
 
 
@@ -567,6 +597,7 @@ def main():
     # remove_directory_tree("if_mangled_files")
 
 
+print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
 main()
 # TODO: reconsider maximum depth to be detected as title (now at four)
 # TODO: adapt script to be used from command line

From 4d7fbdb193e14fc8d93dd914748409aff0d2170c Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 14 Aug 2024 11:48:59 +0200
Subject: [PATCH 029/152] small optimisations

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index d8d8000bdcc..91198ea777e 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -28,7 +28,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
     :param last_title: the most recently encountered title
     :param curr_dirs: the most recent directories at each title level
     :param root_dirs: a list containing the root directories
-    param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file
+    :param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file
     :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial
     :param in_code_block_: boolean to indicate whether the current line is part of a codeblock
     :return: the depth of the title
@@ -427,7 +427,7 @@ def make_valid_title(title):
     invalid_chars = r'[<>:"/\\|?*\0()]'
 
     # get rid of extra information between {} brackets
-    s = re.sub(r'\{.*?}', '', title)
+    title = re.sub(r'\{.*?}', '', title)
 
     # Remove invalid characters
     valid_filename = re.sub(invalid_chars, '', title)
@@ -513,16 +513,11 @@ def main():
             main_title = filename[:-3]
 
             # variable that keeps track of the directories that are used to write in at different levels
-            curr_dirs = [filename[:-3] for i in range(5)]
-
-            # variable to keep track whether we're dealing with OS-specific info or not
-            OS_specific = False
+            curr_dirs = [filename[:-3] for _ in range(5)]
 
             # variable that keeps track of the latest non-zero level title and corresponding directory
-            last_title_level = 1
             last_title = None
             last_directory = None
-            last_was_title = False
 
             # list to keep track of links in the text
             links_generic = []
@@ -572,7 +567,6 @@ def main():
 
                     # line is a title with a maximum depth of 4
                     if title_level > 0:
-                        last_title_level = title_level
                         last_title = title
                         last_directory = directory
                         after_first_title = True

From 671f7f3b5e57a2643e87a65a8b449e068176261d Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 14 Aug 2024 11:50:18 +0200
Subject: [PATCH 030/152] small optimisations

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 91198ea777e..70ba8b17ee5 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -354,9 +354,6 @@ def write_text_to_file(file_name, curr_line, link_lists):
             curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3])
         write_file.write(curr_line)
 
-        # if re.search(r'<.*?>', curr_line):
-        #     print(curr_line)
-
     return link_lists
 
 
@@ -587,8 +584,8 @@ def main():
             for OS in ["", "Linux", "Windows", "macOS"]:
                 write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), OS, links_generic, is_linux_tutorial, main_title, last_title)
 
-    # remove_directory_tree("copies")
-    # remove_directory_tree("if_mangled_files")
+    remove_directory_tree("copies")
+    remove_directory_tree("if_mangled_files")
 
 
 print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")

From e5c39bd2dd5f7e708b802f193656c20dfaa41253 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 14 Aug 2024 12:04:39 +0200
Subject: [PATCH 031/152] initial commit

---
 scripts/HPC_chatbot_preprocessor/README.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/README.md

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
new file mode 100644
index 00000000000..32ec81c2fa5
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -0,0 +1,16 @@
+# Chatbot parser
+
+`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory for a chatbot to be trained on. 
+
+## Generated file structure
+
+This directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: 
+
+- `generic` contains the parts of the markdown sources that were non-OS-specific
+- `os_specific` contains the parts of the markdown sources that were OS-specific
+
+Withing `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation.
+
+These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. 
+
+Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on <docs.hpc.ugent.be>.
\ No newline at end of file

From c6492fc14120e9391507a8363bcd9a82976766e8 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 14 Aug 2024 13:03:46 +0200
Subject: [PATCH 032/152] added requirements

---
 scripts/HPC_chatbot_preprocessor/requirements.txt | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/requirements.txt

diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt
new file mode 100644
index 00000000000..19ed8a2a29d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/requirements.txt
@@ -0,0 +1,6 @@
+os
+re
+shutil
+pypandoc
+yaml
+jinja2
\ No newline at end of file

From aff8198d90ed64b044e837fd672c0019b88520d8 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 14 Aug 2024 13:04:08 +0200
Subject: [PATCH 033/152] added requirements and usage info

---
 scripts/HPC_chatbot_preprocessor/README.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 32ec81c2fa5..e1e12046dd5 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -13,4 +13,17 @@ Withing `os_specific` a further distinction is made for each of the three possib
 
 These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. 
 
-Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on <docs.hpc.ugent.be>.
\ No newline at end of file
+Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on <docs.hpc.ugent.be>.
+
+## Requirements
+
+- The required Python packages are listed in `requirements.txt`
+- [Pandoc](https://pandoc.org/installing.html) must be installed and must be added to the system PATH
+
+## Usage
+
+The script can be ran in a shell environment with the following command:
+
+```shell
+python chatbot_parser.py
+```
\ No newline at end of file

From a981002d1cd8eab50a69d860838084b768f538e8 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 14 Aug 2024 13:04:27 +0200
Subject: [PATCH 034/152] minor changes to the print statements

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 70ba8b17ee5..6cb74a5c9be 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -590,5 +590,4 @@ def main():
 
 print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
 main()
-# TODO: reconsider maximum depth to be detected as title (now at four)
-# TODO: adapt script to be used from command line
+print("Parsing finished successfully")

From 1f3b3432fdba5390befbfd2109fa1b698c5b0728 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 16 Aug 2024 10:53:10 +0200
Subject: [PATCH 035/152] reworked function to take care of html structures

---
 .../chatbot_parser.py                         | 56 +++++++++++++------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 6cb74a5c9be..d5e950973ec 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -4,6 +4,7 @@
 import pypandoc
 import yaml
 from jinja2 import FileSystemLoader, Environment, ChoiceLoader
+from itertools import chain
 
 
 ################### define functions ###################
@@ -91,6 +92,13 @@ def replace_markdown_markers(curr_line, linklist):
     :return curr_line: the adapted current line
     :return linklist: the updated linklist
     """
+
+    # TODO: filter out images before links
+    # replace images with an empty line
+    if re.match(r'!\[image]\(.*?\)', curr_line) or re.match(r'!\[]\(img/.*?.png\)', curr_line):
+        print(curr_line)
+        curr_line = ""
+
     # replace links with a reference
     matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line)
     if matches:
@@ -106,25 +114,36 @@ def replace_markdown_markers(curr_line, linklist):
     match = re.findall(r'<(.*?)>', curr_line)
     if match:
         for i, content in enumerate(match):
-            exception_words = ['SEQUENCE', 'vsc40000', 'Session', 'OUTPUT_DIR', 'jobname', 'jobid', 'hostname', 'Enjoy the day!', 'stdout', 'stderr', 'coursecode', 'year', 'nickname', '01', 'number of ', 'user', 'home', 'software', 'module']
-            if '#include' in curr_line:
-                pass
-            elif '.' in content:
-                curr_line = re.sub(f'<{content}>', f"{content}", curr_line)
-            elif '***' in content:
-                curr_line = re.sub(r'<\*\*\*', "", re.sub(r'\*\*\*\\>', "", curr_line))
-            elif '-' in content and ' ' not in content:
-                curr_line = re.sub(f'<{content}>', f"{content}", curr_line)
-            # sometimes normal words are between <> brackets and should be excluded (ugly fix)
-            elif any(substring in content for substring in exception_words):
-                pass
-            # special cases that messed up the formatting (ugly fix)
+            syntax_words = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"]  # make sure these are always lowercase
+            syntax_words_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in syntax_words]))
+            syntax_words_style = [element + " style=.*" for element in syntax_words]
+
+            # add references for every link of format <a href=...>
+            if re.search(r'a href=.*', content):
+                link = content[8:-1]
+                curr_line = re.sub(f'<{content}>', "[" + str(len(linklist) + 1) + "]", curr_line)
+                linklist.append(link)
+
+            # drop the syntax words
+            elif content.lower() in syntax_words_variations:
+                curr_line = re.sub(f'<{content}>', "", curr_line)
+
+            # drop the version of the syntax_words followed by " style="
+            elif any(re.match(pattern, content) for pattern in syntax_words_style):
+                curr_line = re.sub(r'<.*?>', "", curr_line)
+
+            # drop markdown comments
+            elif re.fullmatch(r'!--.*?--', content):
+                curr_line = re.sub(r'<.*?>', "", curr_line)
+
+            # special case (ugly fix)
             elif ' files</b' in content:
                 curr_line = re.sub(r'</b>', "", curr_line)
-            elif '<>' in curr_line:
-                pass
+
+            # keep the rest
             else:
-                curr_line = re.sub(r'<.*?>', "", curr_line)
+                # print("<" + content + ">")
+                pass
 
     # structures with !!! (info, tips, warnings)
     if '!!!' in curr_line:
@@ -505,6 +524,7 @@ def main():
                 root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux")
                 root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows")
                 root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos")
+            root_dirs = [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos]
 
             # variable for the main title (needed for reference links)
             main_title = filename[:-3]
@@ -581,8 +601,8 @@ def main():
                             link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists)
 
             # write end of file for the last file
-            for OS in ["", "Linux", "Windows", "macOS"]:
-                write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), OS, links_generic, is_linux_tutorial, main_title, last_title)
+            for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
+                write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial, main_title, last_title)
 
     remove_directory_tree("copies")
     remove_directory_tree("if_mangled_files")

From 48cad9779f0ed2a492027330b5af531cf0631079 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 16 Aug 2024 11:26:43 +0200
Subject: [PATCH 036/152] filter out images

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index d5e950973ec..2408557fd49 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -93,10 +93,8 @@ def replace_markdown_markers(curr_line, linklist):
     :return linklist: the updated linklist
     """
 
-    # TODO: filter out images before links
     # replace images with an empty line
-    if re.match(r'!\[image]\(.*?\)', curr_line) or re.match(r'!\[]\(img/.*?.png\)', curr_line):
-        print(curr_line)
+    if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[]\(img/.*?.png\)', curr_line):
         curr_line = ""
 
     # replace links with a reference
@@ -495,9 +493,6 @@ def main():
             else:
                 filenames_generic[file] = os.path.join(source_directory, file)
 
-    # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc (temporary, should be taken out when the original files have been converted to proper markdown)
-    problem_files = ["getting_started.md", "navigating.md"]
-
     # for loops over all files
     for filenames in [filenames_generic, filenames_linux]:
         for filename in filenames.keys():
@@ -568,10 +563,6 @@ def main():
             # process the jinja macros
             jinja_parser(filename, copy_file)
 
-            # convert the files without proper markdown layout into markdown using pandoc
-            if "linux-tutorial" in filenames[filename] and filename in problem_files:
-                pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file)
-
             # open the file and store line by line in the right file
             with open(copy_file, 'r') as readfile:
 

From df58f233e125078552318647815054bdfdff0bcb Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 16 Aug 2024 13:28:28 +0200
Subject: [PATCH 037/152] get rid of backquotes, asterisks, pluses and
 underscores used for formatting

---
 .../chatbot_parser.py                         | 53 ++++++++++++++-----
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 2408557fd49..72d8c251c55 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -1,7 +1,6 @@
 import os
 import re
 import shutil
-import pypandoc
 import yaml
 from jinja2 import FileSystemLoader, Environment, ChoiceLoader
 from itertools import chain
@@ -83,12 +82,13 @@ def create_directory(new_directory):
         os.mkdir(new_directory)
 
 
-def replace_markdown_markers(curr_line, linklist):
+def replace_markdown_markers(curr_line, linklist, in_code_block):
     """
     function that replaces certain markdown structures with the equivalent used on the website
 
     :param curr_line: the current line on which markdown structures need to be replaced
     :param linklist: the list used to store links that need to be printed at the end of the file
+    :param in_code_block: boolean indicating whether the current line is part of a code block
     :return curr_line: the adapted current line
     :return linklist: the updated linklist
     """
@@ -147,6 +147,29 @@ def replace_markdown_markers(curr_line, linklist):
     if '!!!' in curr_line:
         curr_line = re.sub(r'!!!', "", curr_line)
 
+    # get rid of other markdown indicators (`, *, +, _)
+    if not in_code_block:
+
+        backquotes = re.findall(r'`(.*?)`', curr_line)
+        if backquotes:
+            for i, content in enumerate(backquotes):
+                curr_line = curr_line.replace(f"`{content}`", content)
+
+        asterisks = re.findall(r'(?<!\\)(\*+)(.+?)\1', curr_line)
+        if asterisks:
+            for i, content in enumerate(asterisks):
+                curr_line = re.sub(r"(\*+)" + content[1] + r"\1", content[1], curr_line)
+
+        pluses = re.findall(r'\+\+(.+?)\+\+', curr_line)
+        if pluses:
+            for i, content in enumerate(pluses):
+                curr_line = re.sub(r"\+\+" + content + r"\+\+", content, curr_line)
+
+        underscores = re.findall(r' (_+)(.+?)\1 ', curr_line)
+        if underscores:
+            for i, content in enumerate(underscores):
+                curr_line = re.sub(r"(_+)" + content[1] + r"\1", content[1], curr_line)
+
     return curr_line, linklist
 
 
@@ -351,30 +374,31 @@ def check_if_statements(curr_line, active_OS_if_states):
         return "write_text", None, curr_line
 
 
-def write_text_to_file(file_name, curr_line, link_lists):
+def write_text_to_file(file_name, curr_line, link_lists, in_code_block):
     """
     function that writes a line to a file
 
     :param file_name: target file to write the line to
     :param curr_line: line to be written to the file
     :param link_lists: list containing all the links that will be printed at the end of files
+    :param in_code_block: boolean indicating whether the current line is in a codeblock
     :return link_lists: updated link_lists
     """
     with open(file_name, "a") as write_file:
         if "generic" in file_name:
-            curr_line, links_generic = replace_markdown_markers(curr_line, link_lists[0])
+            curr_line, links_generic = replace_markdown_markers(curr_line, link_lists[0], in_code_block)
         elif "linux" in file_name:
-            curr_line, links_linux = replace_markdown_markers(curr_line, link_lists[1])
+            curr_line, links_linux = replace_markdown_markers(curr_line, link_lists[1], in_code_block)
         elif "windows" in file_name:
-            curr_line, links_windows = replace_markdown_markers(curr_line, link_lists[2])
+            curr_line, links_windows = replace_markdown_markers(curr_line, link_lists[2], in_code_block)
         else:
-            curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3])
+            curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3], in_code_block)
         write_file.write(curr_line)
 
     return link_lists
 
 
-def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title, root_dirs, link_lists):
+def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title, root_dirs, link_lists, in_code_block):
     """
     function that decides what file to write text to
 
@@ -384,17 +408,18 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las
     :param last_title: the most recently encountered title
     :param root_dirs: a list with all root directories
     :param link_lists: list of links that need to be written at the end of the files
+    :param in_code_block: boolean indicating whether the current line is in a code block
     :return link_lists: an updated link_lists
     """
     # check that the line is part of the website for gent
     if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive":
-        link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists)
+        link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block)
     if active_OS_if_states["linux"] == "active":
-        link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists)
+        link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block)
     if active_OS_if_states["windows"] == "active":
-        link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), curr_line, link_lists)
+        link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block)
     if active_OS_if_states["macos"] == "active":
-        link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), curr_line, link_lists)
+        link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block)
 
     return link_lists
 
@@ -585,11 +610,11 @@ def main():
                         next_action = check_if_statements(line, active_OS_if_states)
                         while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
                             if next_action[0] == "write_text_and_check_extra_message":
-                                link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists)
+                                link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
                             next_action = check_if_statements(next_action[1], active_OS_if_states)
 
                         if next_action[0] == "write_text":
-                            link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists)
+                            link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
 
             # write end of file for the last file
             for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):

From c423e0782a8374103076decb965404d992eb23f0 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 16 Aug 2024 14:22:01 +0200
Subject: [PATCH 038/152] dump to json files instead of txt files

---
 .../chatbot_parser.py                         | 78 ++++++++++++-------
 1 file changed, 50 insertions(+), 28 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 72d8c251c55..b6061ef9d90 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -1,9 +1,10 @@
+import json
 import os
 import re
 import shutil
 import yaml
-from jinja2 import FileSystemLoader, Environment, ChoiceLoader
 from itertools import chain
+from jinja2 import FileSystemLoader, Environment, ChoiceLoader
 
 
 ################### define functions ###################
@@ -52,7 +53,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
         # if a new title is detected, write the end of the previous file
         if last_title is not None:
             for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
-                write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title)
+                write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title)
 
             # reset the link lists for each OS
             for i in range(4):
@@ -384,16 +385,30 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block):
     :param in_code_block: boolean indicating whether the current line is in a codeblock
     :return link_lists: updated link_lists
     """
-    with open(file_name, "a") as write_file:
+
+    if os.path.exists(file_name) or curr_line.strip():
+        if os.path.exists(file_name):
+            with open(file_name, "r") as read_file:
+                data = json.load(read_file)
+        else:
+            data = {}
+
         if "generic" in file_name:
-            curr_line, links_generic = replace_markdown_markers(curr_line, link_lists[0], in_code_block)
+            curr_line, link_lists[0] = replace_markdown_markers(curr_line, link_lists[0], in_code_block)
         elif "linux" in file_name:
-            curr_line, links_linux = replace_markdown_markers(curr_line, link_lists[1], in_code_block)
+            curr_line, link_lists[1] = replace_markdown_markers(curr_line, link_lists[1], in_code_block)
         elif "windows" in file_name:
-            curr_line, links_windows = replace_markdown_markers(curr_line, link_lists[2], in_code_block)
+            curr_line, link_lists[2] = replace_markdown_markers(curr_line, link_lists[2], in_code_block)
+        else:
+            curr_line, link_lists[3] = replace_markdown_markers(curr_line, link_lists[3], in_code_block)
+
+        if 'content' in data:
+            data['content'] += curr_line
         else:
-            curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3], in_code_block)
-        write_file.write(curr_line)
+            data['content'] = curr_line
+
+        with open(file_name, "w") as write_file:
+            json.dump(data, write_file, indent=4)
 
     return link_lists
 
@@ -413,13 +428,13 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las
     """
     # check that the line is part of the website for gent
     if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive":
-        link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block)
+        link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
     if active_OS_if_states["linux"] == "active":
-        link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block)
+        link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
     if active_OS_if_states["windows"] == "active":
-        link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block)
+        link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
     if active_OS_if_states["macos"] == "active":
-        link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block)
+        link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
 
     return link_lists
 
@@ -436,23 +451,30 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_titl
     :param last_title: the most recently encountered title
     :return:
     """
-    if len(OS) > 0:
-        OS = OS + "/"
 
-    # add the links from within the document
-    with open(file_location, 'a') as write_file:
-        write_file.write("\n\n")
+    if os.path.exists(file_location):
+
+        if len(OS) > 0:
+            OS = OS + "/"
+
+        with open(file_location, "r") as read_file:
+            data = json.load(read_file)
+
+        # add the links from within the document
+        data['links'] = {}
         for i, link in enumerate(linklist):
-            write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n")
+            data['links'][str(i + 1)] = str(link)
 
-    if is_linux_tutorial_:
-        linux_part = "linux-tutorial/"
-    else:
-        linux_part = ""
+        if is_linux_tutorial_:
+            linux_part = "linux-tutorial/"
+        else:
+            linux_part = ""
+
+        # add the reference link
+        data['reference_link'] = ("docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-'))
 
-    # finally add the reference link
-    with open(file_location, 'a') as write_file:
-        write_file.write("\nreference: docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-') + "\n")
+        with open(file_location, 'w') as write_file:
+            json.dump(data, write_file, indent=4)
 
 
 def make_valid_title(title):
@@ -618,10 +640,10 @@ def main():
 
             # write end of file for the last file
             for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
-                write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial, main_title, last_title)
+                write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title)
 
-    remove_directory_tree("copies")
-    remove_directory_tree("if_mangled_files")
+    # remove_directory_tree("copies")
+    # remove_directory_tree("if_mangled_files")
 
 
 print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")

From 2c333fea2e36229a6db8fd7d85ce906ae0479c8c Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 16 Aug 2024 15:44:07 +0200
Subject: [PATCH 039/152] cleaned up parser with macros

---
 .../chatbot_parser.py                         | 284 ++++++++++--------
 1 file changed, 158 insertions(+), 126 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index b6061ef9d90..b36f5c3c471 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -6,19 +6,55 @@
 from itertools import chain
 from jinja2 import FileSystemLoader, Environment, ChoiceLoader
 
-
-################### define functions ###################
-def remove_directory_tree(old_directory):
-    """
-    function that removes a full directory tree
-
-    :param old_directory: the directory to be removed
-    :return:
-    """
-    if os.path.exists(old_directory):
-        shutil.rmtree(old_directory)
+#################### define macro's ####################
+# directories
+PARSED_MDS = "parsed_mds"
+COPIES = "copies"
+IF_MANGLED_FILES = "if_mangled_files"
+LINUX_TUTORIAL = "linux-tutorial"
+RETURN_DIR = ".."
+MKDOCS_DIR = "mkdocs"
+DOCS_DIR = "docs"
+HPC_DIR = "HPC"
+EXTRA_DIR = "extra"
+GENERIC_DIR = "generic"
+OS_SPECIFIC_DIR = "os_specific"
+
+# OSes
+LINUX = "linux"
+WINDOWS = "windows"
+MACOS = "macos"
+
+# urls
+REPO_URL = 'https://github.com/hpcugent/vsc_user_docs'
+DOCS_URL = "docs.hpc.ugent.be"
+
+# OS-related if-states
+ACTIVE = "active"
+INACTIVE = "inactive"
+
+# if mangler states
+NON_OS_IF = 0
+NON_OS_IF_IN_OS_IF = 1
+OS_IF = 2
+OS_IF_IN_OS_IF = 3
+
+# if mangler macros
+IF_MANGLED_PART = "-if-"
+
+# actions
+DONE = "done"
+WRITE_TEXT = "write_text"
+CHECK_EXTRA_MESSAGE = "check_extra_message"
+WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message"
+
+# JSON attributes
+CONTENT = "content"
+LINKS = "links"
+REFERENCE_LINK = "reference_link"
 
 
+################### define functions ###################
 def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_):
     """
     function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables
@@ -63,7 +99,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
         curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')))
 
         for i in range(4):
-            create_directory(os.path.join(root_dirs[i],  curr_dirs[logic_output]))
+            os.makedirs(os.path.join(root_dirs[i],  curr_dirs[logic_output]), exist_ok=True)
 
         # update the higher order current directories
         for i in range(logic_output + 1, 4):
@@ -72,17 +108,6 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
         return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists
 
 
-def create_directory(new_directory):
-    """
-    function that creates new directories
-
-    :param new_directory: directory to be created
-    :return:
-    """
-    if not os.path.exists(new_directory):
-        os.mkdir(new_directory)
-
-
 def replace_markdown_markers(curr_line, linklist, in_code_block):
     """
     function that replaces certain markdown structures with the equivalent used on the website
@@ -183,16 +208,16 @@ def jinja_parser(filename, copy_location):
     :return:
     """
     # YAML file location
-    yml_file_path = os.path.join('..', '..', 'mkdocs', 'extra', 'gent.yml')
+    yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml')
 
     # Read the YAML file
     with open(yml_file_path, 'r') as yml_file:
         words_dict = yaml.safe_load(yml_file)
 
-    # ugly fix for index.md error
+    # ugly fix for index.md error that occurs because of the macro "config.repo_url" in mkdocs/docs/HPC/index.md
     additional_context = {
         'config': {
-            'repo_url': 'https://github.com/hpcugent/vsc_user_docs'
+            'repo_url': REPO_URL
         }
     }
     combined_context = {**words_dict, **additional_context}
@@ -201,7 +226,7 @@ def jinja_parser(filename, copy_location):
     mangle_ifs(copy_location, filename)
 
     # Use Jinja2 to replace the macros
-    template_loader = ChoiceLoader([FileSystemLoader(searchpath='if_mangled_files'), FileSystemLoader(searchpath=os.path.join("..", "..", "mkdocs", "docs", "HPC"))])
+    template_loader = ChoiceLoader([FileSystemLoader(searchpath=IF_MANGLED_FILES), FileSystemLoader(searchpath=os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR))])
     templateEnv = Environment(loader=template_loader)
     template = templateEnv.get_template(filename)
     rendered_content = template.render(combined_context)
@@ -217,7 +242,11 @@ def mangle_os_ifs(line, is_os):
     We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements.
 
     :param line: the current line to check for os-related if-statements
-    :param is_os: boolean keep track of the current os-state of the if-statements. Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
+    :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF 
+        NON_OS_IF: not in an os-if
+        NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if
+        OS_IF: in an os-if
+        OS_IF_IN_OS_IF: in an os-if nested in an os-if}
     :return line: the modified line with  mangled os-related if-statements
     """
 
@@ -232,37 +261,46 @@ def mangle_os_ifs(line, is_os):
         if_match = re.search(r'if ', match.group(1))
         if_os_match = re.search(r'if OS ', match.group(1))
         endif_match = re.search(r'endif', match.group(1))
+
+        # mangle positions
         pos_first_mangle = constr_match.start() + start_index + added_length + 1
         pos_second_mangle = constr_match.end() + start_index + added_length - 1
 
+        # different parts of the original string
+        PART_BEFORE_MANGLING = line[:pos_first_mangle]
+        PART_BETWEEN_MANGLING = line[pos_first_mangle:pos_second_mangle]
+        PART_AFTER_MANGLING = line[pos_second_mangle:]
+
         # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these
         if endif_match:
-            if is_os == 2 or is_os == 3:
-                line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:]
-                added_length += 8
-                if is_os == 2:
-                    is_os = 0
-                elif is_os == 3:
-                    is_os = 2
-            elif is_os == 1:
-                is_os = 2
+            if is_os == OS_IF or is_os == OS_IF_IN_OS_IF:
+                line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING
+                added_length += 2 * len(IF_MANGLED_PART)
+                if is_os == OS_IF:
+                    is_os = NON_OS_IF
+                elif is_os == OS_IF_IN_OS_IF:
+                    is_os = OS_IF
+            elif is_os == NON_OS_IF_IN_OS_IF:
+                is_os = OS_IF
+                
         elif if_match:
             if if_os_match:
-                line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:]
-                added_length += 8
-                if is_os == 2:
-                    is_os = 3
+                line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING
+                added_length += 2 * len(IF_MANGLED_PART)
+                if is_os == OS_IF:
+                    is_os = OS_IF_IN_OS_IF
                 else:
-                    is_os = 2
+                    is_os = OS_IF
             else:
-                if is_os == 2:
-                    is_os = 1
+                if is_os == OS_IF:
+                    is_os = NON_OS_IF_IN_OS_IF
                 else:
-                    is_os = 0
+                    is_os = NON_OS_IF
+                    
         else:
-            if is_os == 2 or is_os == 3:
-                line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:]
-                added_length += 8
+            if is_os == OS_IF or is_os == OS_IF_IN_OS_IF:
+                line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING
+                added_length += 2 * len(IF_MANGLED_PART)
 
         start_index += constr_match.end()
         match = re.search(r'\{%(.*?)%}(.*)', match.group(2))
@@ -278,9 +316,9 @@ def mangle_ifs(directory, filename):
     :return:
     """
     # variable to keep track of latest if-statement scope
-    is_os = 0  # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if}
+    is_os = NON_OS_IF
 
-    with open(os.path.join("if_mangled_files",  filename), 'w') as write_file:
+    with open(os.path.join(IF_MANGLED_FILES,  filename), 'w') as write_file:
         with open(directory, 'r') as read_file:
             for line in read_file:
                 new_line, is_os = mangle_os_ifs(line, is_os)
@@ -294,18 +332,18 @@ def check_if_statements(curr_line, active_OS_if_states):
     :param curr_line: the line to be checked for if-statements to build the directory structure
     :param active_OS_if_states: dictionary keeping track of the active OS states according to the if-statements
     :return: the next action to be done with the line:
-                "done": An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed.
-                "check_extra_message": An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked.
-                "write_text": No if-statement has been found, write the current line to a file (can also be part of the current line)
-                "write_text_and_check_extra_message": An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line.
+                DONE: An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed.
+                CHECK_EXTRA_MESSAGE: An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked.
+                WRITE_TEXT: No if-statement has been found, write the current line to a file (can also be part of the current line)
+                WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line.
     :return: the extra message to be checked, if any
     :return: the text to be written to the file, if any
     """
     # check whether the first part of the line contains information wrt if-statements
-    match = re.search(r'^\{-if-%(.*?)%-if-}(.*)', curr_line)
+    match = re.search(r'^\{' + IF_MANGLED_PART + '%(.*?)%' + IF_MANGLED_PART + '}(.*)', curr_line)
 
     # check whether the line contains information wrt if-statements that is not in its first part
-    match_large = re.search(r'^(.*)(\{-if-%.*?%-if-})(.*)', curr_line)
+    match_large = re.search(r'^(.*)(\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '})(.*)', curr_line)
 
     if match:
         content = match.group(1)
@@ -315,33 +353,33 @@ def check_if_statements(curr_line, active_OS_if_states):
             OS = content.split()[-1]
 
             # set new active OS
-            active_OS_if_states[OS] = "active"
+            active_OS_if_states[OS] = ACTIVE
 
             # set other active ones on inactive
             for other_OS in active_OS_if_states.keys():
-                if other_OS != OS and active_OS_if_states[other_OS] == "active":
-                    active_OS_if_states[other_OS] = "inactive"
+                if other_OS != OS and active_OS_if_states[other_OS] == ACTIVE:
+                    active_OS_if_states[other_OS] = INACTIVE
 
         # new if-statement wrt OS with '!='
         elif re.search(r'if OS != ', content):
             OS = content.split()[-1]
 
             # set new active OS
-            active_OS_if_states[OS] = "inactive"
+            active_OS_if_states[OS] = INACTIVE
 
             # set other inactive ones on active
             for other_OS in active_OS_if_states.keys():
-                if other_OS != OS and active_OS_if_states[other_OS] == "inactive":
-                    active_OS_if_states[other_OS] = "active"
+                if other_OS != OS and active_OS_if_states[other_OS] == INACTIVE:
+                    active_OS_if_states[other_OS] = ACTIVE
 
         # endif statement wrt OS
         elif re.search(r'endif', content):
             if str(1) in active_OS_if_states.values():
                 active_OS_if_states[
-                    list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active"
+                    list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = ACTIVE
             else:
                 for key in active_OS_if_states.keys():
-                    active_OS_if_states[key] = "inactive"
+                    active_OS_if_states[key] = INACTIVE
 
         # else statement wrt OS
         elif re.search(r'else', content):
@@ -353,26 +391,26 @@ def check_if_statements(curr_line, active_OS_if_states):
 
             # set the previously active one on inactive until the next endif
             key_list = list(active_OS_if_states.keys())
-            position = list(active_OS_if_states.values()).index("active")
+            position = list(active_OS_if_states.values()).index(ACTIVE)
             active_OS_if_states[key_list[position]] = str(i)
 
             # set inactive ones on active
-            while "inactive" in active_OS_if_states.values():
-                position = list(active_OS_if_states.values()).index("inactive")
-                active_OS_if_states[key_list[position]] = "active"
+            while INACTIVE in active_OS_if_states.values():
+                position = list(active_OS_if_states.values()).index(INACTIVE)
+                active_OS_if_states[key_list[position]] = ACTIVE
 
         if len(match.group(2)) != 0:
             extra_message = match.group(2).lstrip()
-            return "check_extra_message", extra_message, None
+            return CHECK_EXTRA_MESSAGE, extra_message, None
 
         else:
-            return "done", None, None
+            return DONE, None, None
 
     elif match_large:
-        return "write_text_and_check_extra_message", match_large.group(2), match_large.group(1)
+        return WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE, match_large.group(2), match_large.group(1)
 
     else:
-        return "write_text", None, curr_line
+        return WRITE_TEXT, None, curr_line
 
 
 def write_text_to_file(file_name, curr_line, link_lists, in_code_block):
@@ -393,19 +431,19 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block):
         else:
             data = {}
 
-        if "generic" in file_name:
+        if GENERIC_DIR in file_name:
             curr_line, link_lists[0] = replace_markdown_markers(curr_line, link_lists[0], in_code_block)
-        elif "linux" in file_name:
+        elif LINUX in file_name:
             curr_line, link_lists[1] = replace_markdown_markers(curr_line, link_lists[1], in_code_block)
-        elif "windows" in file_name:
+        elif WINDOWS in file_name:
             curr_line, link_lists[2] = replace_markdown_markers(curr_line, link_lists[2], in_code_block)
         else:
             curr_line, link_lists[3] = replace_markdown_markers(curr_line, link_lists[3], in_code_block)
 
-        if 'content' in data:
-            data['content'] += curr_line
+        if CONTENT in data:
+            data[CONTENT] += curr_line
         else:
-            data['content'] = curr_line
+            data[CONTENT] = curr_line
 
         with open(file_name, "w") as write_file:
             json.dump(data, write_file, indent=4)
@@ -427,13 +465,13 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las
     :return link_lists: an updated link_lists
     """
     # check that the line is part of the website for gent
-    if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive":
+    if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE:
         link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
-    if active_OS_if_states["linux"] == "active":
+    if active_OS_if_states[LINUX] == ACTIVE:
         link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
-    if active_OS_if_states["windows"] == "active":
+    if active_OS_if_states[WINDOWS] == ACTIVE:
         link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
-    if active_OS_if_states["macos"] == "active":
+    if active_OS_if_states[MACOS] == ACTIVE:
         link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
 
     return link_lists
@@ -461,17 +499,17 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_titl
             data = json.load(read_file)
 
         # add the links from within the document
-        data['links'] = {}
+        data[LINKS] = {}
         for i, link in enumerate(linklist):
-            data['links'][str(i + 1)] = str(link)
+            data[LINKS][str(i + 1)] = str(link)
 
         if is_linux_tutorial_:
-            linux_part = "linux-tutorial/"
+            linux_part = LINUX_TUTORIAL + "/"
         else:
             linux_part = ""
 
         # add the reference link
-        data['reference_link'] = ("docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-'))
+        data[REFERENCE_LINK] = (DOCS_URL + "/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-'))
 
         with open(file_location, 'w') as write_file:
             json.dump(data, write_file, indent=4)
@@ -505,28 +543,28 @@ def main():
     :return:
     """
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
-    remove_directory_tree("parsed_mds")
-    remove_directory_tree("copies")
-    remove_directory_tree("if_mangled_files")
+    shutil.rmtree(PARSED_MDS)
+    shutil.rmtree(COPIES)
+    shutil.rmtree(IF_MANGLED_FILES)
 
     # make the necessary directories
-    if not os.path.exists("copies"):
-        os.mkdir("copies")
+    if not os.path.exists(COPIES):
+        os.mkdir(COPIES)
 
-    if not os.path.exists(os.path.join("copies", "linux")):
-        os.mkdir(os.path.join("copies", "linux"))
+    if not os.path.exists(os.path.join(COPIES, LINUX_TUTORIAL)):
+        os.mkdir(os.path.join(COPIES, LINUX_TUTORIAL))
 
-    if not os.path.exists("parsed_mds"):
-        os.mkdir("parsed_mds")
+    if not os.path.exists(PARSED_MDS):
+        os.mkdir(PARSED_MDS)
 
-    if not os.path.exists("if_mangled_files"):
-        os.mkdir("if_mangled_files")
+    if not os.path.exists(IF_MANGLED_FILES):
+        os.mkdir(IF_MANGLED_FILES)
 
     ################### define loop-invariant variables ###################
 
     # variable that keeps track of the source directories
-    source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"),
-                          os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")]
+    source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
+                          os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
 
     # list of all the filenames
     filenames_generic = {}
@@ -535,7 +573,7 @@ def main():
         all_items = os.listdir(source_directory)
         files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
         for file in files:
-            if "linux-tutorial" in source_directory:
+            if LINUX_TUTORIAL in source_directory:
                 filenames_linux[file] = os.path.join(source_directory, file)
             else:
                 filenames_generic[file] = os.path.join(source_directory, file)
@@ -546,26 +584,26 @@ def main():
             ################### define/reset loop specific variables ###################
 
             # variable that keeps track of whether file is part of the linux tutorial
-            is_linux_tutorial = bool("linux-tutorial" in filenames[filename])
+            is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename])
 
             # make a copy of the original file in order to make sure the original does not get altered
             if is_linux_tutorial:
-                copy_file = os.path.join("copies", "linux",  filename)
+                copy_file = os.path.join(COPIES, LINUX_TUTORIAL,  filename)
             else:
-                copy_file = os.path.join("copies", filename)
+                copy_file = os.path.join(COPIES, filename)
             shutil.copyfile(filenames[filename], copy_file)
 
             # variable that keeps track of the directories that are used to write in at different levels
             if is_linux_tutorial:
-                root_dir_generic = os.path.join("parsed_mds", "generic", "linux_tutorial")
-                root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux", "linux_tutorial")
-                root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows", "linux_tutorial")
-                root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos", "linux_tutorial")
+                root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR, LINUX_TUTORIAL)
+                root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX, LINUX_TUTORIAL)
+                root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS, LINUX_TUTORIAL)
+                root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS, LINUX_TUTORIAL)
             else:
-                root_dir_generic = os.path.join("parsed_mds", "generic")
-                root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux")
-                root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows")
-                root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos")
+                root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR)
+                root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX)
+                root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS)
+                root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS)
             root_dirs = [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos]
 
             # variable for the main title (needed for reference links)
@@ -586,7 +624,7 @@ def main():
             link_lists = [links_generic, links_linux, links_windows, links_macos]
 
             # dictionaries to keep track of current OS
-            active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"}
+            active_OS_if_states = {LINUX: INACTIVE, WINDOWS: INACTIVE, MACOS: INACTIVE}
 
             # variable that shows whether the first title has been reached yet
             after_first_title = False
@@ -597,15 +635,8 @@ def main():
             ################### actually parse the md file ###################
 
             # create directories for the source markdown file
-            create_directory(root_dir_generic)
-            create_directory(os.path.join("parsed_mds", "os_specific"))
-            create_directory(root_dir_os_specific_linux)
-            create_directory(root_dir_os_specific_windows)
-            create_directory(root_dir_os_specific_macos)
-            create_directory(os.path.join(root_dir_generic, curr_dirs[0]))
-            create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[0]))
-            create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[0]))
-            create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[0]))
+            for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
+                os.makedirs(directory, exist_ok=True)
 
             # process the jinja macros
             jinja_parser(filename, copy_file)
@@ -630,22 +661,23 @@ def main():
                     elif after_first_title:
                         # check for if-statements and write the appropriate lines in the right files
                         next_action = check_if_statements(line, active_OS_if_states)
-                        while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message":
-                            if next_action[0] == "write_text_and_check_extra_message":
+                        while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE:
+                            if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE:
                                 link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
                             next_action = check_if_statements(next_action[1], active_OS_if_states)
 
-                        if next_action[0] == "write_text":
+                        if next_action[0] == WRITE_TEXT:
                             link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
 
             # write end of file for the last file
             for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
                 write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title)
 
-    # remove_directory_tree("copies")
-    # remove_directory_tree("if_mangled_files")
+    # remove_directory_tree(COPIES)
+    # remove_directory_tree(IF_MANGLED_FILES)
 
 
+################### run the script ###################
 print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
 main()
 print("Parsing finished successfully")

From ce5235250b0e99e4a60fd03ab150b838c5e4d82e Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 16 Aug 2024 15:47:55 +0200
Subject: [PATCH 040/152] cleaned up parser with macros

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index b36f5c3c471..b6833632267 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -273,7 +273,7 @@ def mangle_os_ifs(line, is_os):
 
         # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these
         if endif_match:
-            if is_os == OS_IF or is_os == OS_IF_IN_OS_IF:
+            if is_os in (OS_IF, OS_IF_IN_OS_IF):
                 line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING
                 added_length += 2 * len(IF_MANGLED_PART)
                 if is_os == OS_IF:
@@ -298,7 +298,7 @@ def mangle_os_ifs(line, is_os):
                     is_os = NON_OS_IF
                     
         else:
-            if is_os == OS_IF or is_os == OS_IF_IN_OS_IF:
+            if is_os in (OS_IF, OS_IF_IN_OS_IF):
                 line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING
                 added_length += 2 * len(IF_MANGLED_PART)
 

From 5db34afdb1a8f8dc4439daaefbd97c9204caa3d9 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 16 Aug 2024 15:52:29 +0200
Subject: [PATCH 041/152] cleaned up parser with macros

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index b6833632267..a2abc77b798 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -246,7 +246,7 @@ def mangle_os_ifs(line, is_os):
         NON_OS_IF: not in an os-if
         NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if
         OS_IF: in an os-if
-        OS_IF_IN_OS_IF: in an os-if nested in an os-if}
+        OS_IF_IN_OS_IF: in an os-if nested in an os-if
     :return line: the modified line with  mangled os-related if-statements
     """
 

From 4226d28ddd50e91acb6988317d42de0f881eaea4 Mon Sep 17 00:00:00 2001
From: EwDa291 <100782488+EwDa291@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:17:11 +0200
Subject: [PATCH 042/152] Update README.md

---
 scripts/HPC_chatbot_preprocessor/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index e1e12046dd5..5e895d4d62c 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -4,12 +4,12 @@
 
 ## Generated file structure
 
-This directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: 
+The generated directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: 
 
 - `generic` contains the parts of the markdown sources that were non-OS-specific
 - `os_specific` contains the parts of the markdown sources that were OS-specific
 
-Withing `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation.
+Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation.
 
 These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. 
 
@@ -26,4 +26,4 @@ The script can be ran in a shell environment with the following command:
 
 ```shell
 python chatbot_parser.py
-```
\ No newline at end of file
+```

From d730a262f667fc00ce637d4ee7e607f201072c2f Mon Sep 17 00:00:00 2001
From: EwDa291 <100782488+EwDa291@users.noreply.github.com>
Date: Mon, 19 Aug 2024 12:37:51 +0200
Subject: [PATCH 043/152] Update README.md

---
 scripts/HPC_chatbot_preprocessor/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 5e895d4d62c..6e7d0edc71a 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -1,6 +1,6 @@
 # Chatbot parser
 
-`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory for a chatbot to be trained on. 
+`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. 
 
 ## Generated file structure
 

From f3182e35b769550f9483a4e690b300c8775e494b Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 19 Aug 2024 13:44:46 +0200
Subject: [PATCH 044/152] added section about restrictions on input files

---
 scripts/HPC_chatbot_preprocessor/README.md | 92 ++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 6e7d0edc71a..23c1d87cc44 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -27,3 +27,95 @@ The script can be ran in a shell environment with the following command:
 ```shell
 python chatbot_parser.py
 ```
+
+## Restrictions on source-files
+
+Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input.
+
+
+### Nested if structures
+
+The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. 
+
+#### Examples of valid and invalid if-structures
+
+##### Allowed
+
+###### non-os-related in os-related
+
+This is an example of one of the basic allowed if-structures
+
+```
+if OS == windows:
+  if site == Gent:
+    ...
+  endif
+endif
+```
+
+###### os-related in os-related in non-os-related
+
+This is an example of a basic allowed if-structure nested in a non-os-specific if.
+
+```
+if site == Gent:
+  if OS == windows:
+    ...
+  else:
+    if OS == Linux:
+      ...
+    endif
+  endif
+endif
+```
+
+##### Not allowed
+
+###### non-os-related in os-related in os-related
+
+This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures.
+
+```
+if OS != windows:
+  if OS == Linux:
+    if site == Gent:
+      ...
+    endif
+  endif
+endif
+```
+
+This will result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it.
+
+###### os-related in non-os-related in os-related
+
+This is an example of one of the basic allowed if-structures nested in an os-specific if-structure.
+
+```
+if OS != windows:
+  if site == Gent:
+    if OS == Linux:
+      ...
+    endif
+  endif
+endif
+```
+
+This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it.
+
+### Allowed html syntax
+
+The script contains a list of html syntax keywords it filters out. If more html syntax keywords are used in the future, it suffices to add them to this list to adapt the script to filter them out. The current list is:
+```
+["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"]
+```
+The script is also adapted to take into consideration structures like <a href="link"> and retain the link.
+
+### Markdown comments
+
+Any comments within the markdown files (for example TODO's) should follow the following syntax:
+
+```
+<!--your comment-->
+```
+ and should be limited to one line.

From 675bec5c75d7b0cda95d61867d6a587e7ba13a19 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 19 Aug 2024 13:53:32 +0200
Subject: [PATCH 045/152] adapted section about restrictions on input files

---
 scripts/HPC_chatbot_preprocessor/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 23c1d87cc44..6899aacf2b0 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -43,7 +43,7 @@ The script uses the if-structures in the source-files to split the documentation
 
 ###### non-os-related in os-related
 
-This is an example of one of the basic allowed if-structures
+This is an example of one of the basic allowed if-structures (`NON_OS_IF_IN_OS_IF`)
 
 ```
 if OS == windows:
@@ -55,7 +55,7 @@ endif
 
 ###### os-related in os-related in non-os-related
 
-This is an example of a basic allowed if-structure nested in a non-os-specific if.
+This is an example of the basic allowed if-structure `OS_IF_IN_OS_IF` nested in a non-os-specific if.
 
 ```
 if site == Gent:

From f1e58ef776a24eb2bb39bed1de1eb0611a0f60eb Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 19 Aug 2024 13:54:12 +0200
Subject: [PATCH 046/152] adapted section about restrictions on input files

---
 scripts/HPC_chatbot_preprocessor/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 6899aacf2b0..c18a4ebea64 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -73,7 +73,7 @@ endif
 
 ###### non-os-related in os-related in os-related
 
-This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures.
+This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures (`OS_IF_IN_OS_IF`).
 
 ```
 if OS != windows:
@@ -89,7 +89,7 @@ This will result in the parser "forgetting" it opened an os-specific if-statemen
 
 ###### os-related in non-os-related in os-related
 
-This is an example of one of the basic allowed if-structures nested in an os-specific if-structure.
+This is an example of the basic allowed if-structure `OS_IF` (indirectly) nested in an os-specific if-structure.
 
 ```
 if OS != windows:
@@ -109,7 +109,7 @@ The script contains a list of html syntax keywords it filters out. If more html
 ```
 ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"]
 ```
-The script is also adapted to take into consideration structures like <a href="link"> and retain the link.
+The script is also adapted to take into consideration structures like `<a href="link">` and retain the link.
 
 ### Markdown comments
 

From a16850925bdcfad0f3017578082b727244d8b63b Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 19 Aug 2024 14:37:48 +0200
Subject: [PATCH 047/152] change variables to be lowercase

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index a2abc77b798..600c2c08c1e 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -267,14 +267,14 @@ def mangle_os_ifs(line, is_os):
         pos_second_mangle = constr_match.end() + start_index + added_length - 1
 
         # different parts of the original string
-        PART_BEFORE_MANGLING = line[:pos_first_mangle]
-        PART_BETWEEN_MANGLING = line[pos_first_mangle:pos_second_mangle]
-        PART_AFTER_MANGLING = line[pos_second_mangle:]
+        part_before_mangling = line[:pos_first_mangle]
+        part_between_mangling = line[pos_first_mangle:pos_second_mangle]
+        part_after_mangling = line[pos_second_mangle:]
 
         # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these
         if endif_match:
             if is_os in (OS_IF, OS_IF_IN_OS_IF):
-                line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING
+                line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
                 added_length += 2 * len(IF_MANGLED_PART)
                 if is_os == OS_IF:
                     is_os = NON_OS_IF
@@ -285,7 +285,7 @@ def mangle_os_ifs(line, is_os):
                 
         elif if_match:
             if if_os_match:
-                line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING
+                line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
                 added_length += 2 * len(IF_MANGLED_PART)
                 if is_os == OS_IF:
                     is_os = OS_IF_IN_OS_IF
@@ -299,7 +299,7 @@ def mangle_os_ifs(line, is_os):
                     
         else:
             if is_os in (OS_IF, OS_IF_IN_OS_IF):
-                line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING
+                line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
                 added_length += 2 * len(IF_MANGLED_PART)
 
         start_index += constr_match.end()

From 09b86c9fba292b76ddb0c8ecf523e5c73d87c30c Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 19 Aug 2024 14:49:08 +0200
Subject: [PATCH 048/152] take out some copy pasting

---
 .../chatbot_parser.py                         | 24 ++++++++-----------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 600c2c08c1e..7b25c5a9d06 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -431,14 +431,10 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block):
         else:
             data = {}
 
-        if GENERIC_DIR in file_name:
-            curr_line, link_lists[0] = replace_markdown_markers(curr_line, link_lists[0], in_code_block)
-        elif LINUX in file_name:
-            curr_line, link_lists[1] = replace_markdown_markers(curr_line, link_lists[1], in_code_block)
-        elif WINDOWS in file_name:
-            curr_line, link_lists[2] = replace_markdown_markers(curr_line, link_lists[2], in_code_block)
-        else:
-            curr_line, link_lists[3] = replace_markdown_markers(curr_line, link_lists[3], in_code_block)
+        os_list = [GENERIC_DIR, LINUX, WINDOWS, MACOS]
+        for i, os_ in enumerate(os_list):
+            if os_ in file_name:
+                curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block)
 
         if CONTENT in data:
             data[CONTENT] += curr_line
@@ -467,12 +463,12 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las
     # check that the line is part of the website for gent
     if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE:
         link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
-    if active_OS_if_states[LINUX] == ACTIVE:
-        link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
-    if active_OS_if_states[WINDOWS] == ACTIVE:
-        link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
-    if active_OS_if_states[MACOS] == ACTIVE:
-        link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
+    else:
+        os_list = [LINUX, WINDOWS, MACOS]
+        for i, os_ in enumerate(os_list):
+            if active_OS_if_states[os_] == ACTIVE:
+                link_lists = write_text_to_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"),
+                                                curr_line, link_lists, in_code_block)
 
     return link_lists
 

From f95b99e203163e5bf0514a4ae4c4af16d1dd50df Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 19 Aug 2024 15:11:04 +0200
Subject: [PATCH 049/152] added warning about long filepaths

---
 scripts/HPC_chatbot_preprocessor/README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index c18a4ebea64..0b715d85650 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -32,7 +32,6 @@ python chatbot_parser.py
 
 Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input.
 
-
 ### Nested if structures
 
 The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. 
@@ -119,3 +118,7 @@ Any comments within the markdown files (for example TODO's) should follow the fo
 <!--your comment-->
 ```
  and should be limited to one line.
+
+### Long filenames
+
+Due to the nature of this script, it can generate directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long.

From 06bb7b9ea18f7cbae70190e1e939eb5952ee09b1 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 19 Aug 2024 15:12:21 +0200
Subject: [PATCH 050/152] fixing typos

---
 scripts/HPC_chatbot_preprocessor/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 0b715d85650..55996e0bef5 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -119,6 +119,6 @@ Any comments within the markdown files (for example TODO's) should follow the fo
 ```
  and should be limited to one line.
 
-### Long filenames
+### Long filepaths
 
-Due to the nature of this script, it can generate directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long.
+Due to the nature of this script, it can generate large directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long.

From 2f3e5b303a8875fe315592f792addba78f4d0e82 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 19 Aug 2024 17:02:30 +0200
Subject: [PATCH 051/152] take out copy pasting

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 7b25c5a9d06..c2fe409b420 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -464,10 +464,9 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las
     if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE:
         link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
     else:
-        os_list = [LINUX, WINDOWS, MACOS]
-        for i, os_ in enumerate(os_list):
+        for i, os_ in enumerate([LINUX, WINDOWS, MACOS]):
             if active_OS_if_states[os_] == ACTIVE:
-                link_lists = write_text_to_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"),
+                link_lists = write_text_to_file(os.path.join(root_dirs[i + 1], last_directory, last_title + ".json"),
                                                 curr_line, link_lists, in_code_block)
 
     return link_lists

From 0c4dbe8e02639de7787af8109df7781053101d2a Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 20 Aug 2024 14:39:12 +0200
Subject: [PATCH 052/152] first draft version of the restructured script to
 accommodate for the new file format

---
 .../chatbot_parser.py                         | 302 +++++++++++++++---
 .../HPC_chatbot_preprocessor/requirements.txt |   3 +-
 2 files changed, 255 insertions(+), 50 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index c2fe409b420..72aa40292f9 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -4,9 +4,15 @@
 import shutil
 import yaml
 from itertools import chain
-from jinja2 import FileSystemLoader, Environment, ChoiceLoader
+from pathlib import Path
+from jinja2 import FileSystemLoader, Environment, ChoiceLoader, Template
 
 #################### define macro's ####################
+# customizable macros
+MIN_PARAGRAPH_LENGTH = 128
+MAX_TITLE_DEPTH = 4
+INCLUDE_LINKS_IN_PLAINTEXT = True
+
 # directories
 PARSED_MDS = "parsed_mds"
 COPIES = "copies"
@@ -24,10 +30,11 @@
 LINUX = "linux"
 WINDOWS = "windows"
 MACOS = "macos"
+GENERIC = "generic"
 
 # urls
 REPO_URL = 'https://github.com/hpcugent/vsc_user_docs'
-DOCS_URL = "docs.hpc.ugent.be"
+DOCS_URL = "https://docs.hpc.ugent.be"
 
 # OS-related if-states
 ACTIVE = "active"
@@ -76,7 +83,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
 
     # detect titles
     match = re.match(r'^#+ ', curr_line)
-    if match and len(match.group(0)) <= 5:
+    if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1:
         logic_output = len(match.group(0)) - 1
     else:
         logic_output = 0
@@ -102,19 +109,37 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
             os.makedirs(os.path.join(root_dirs[i],  curr_dirs[logic_output]), exist_ok=True)
 
         # update the higher order current directories
-        for i in range(logic_output + 1, 4):
+        for i in range(logic_output + 1, MAX_TITLE_DEPTH + 1):
             curr_dirs[i] = curr_dirs[logic_output]
 
         return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists
 
 
-def replace_markdown_markers(curr_line, linklist, in_code_block):
+def check_for_title_simple(line, in_code_block, curr_dirs):
+
+    # detect titles
+    match = re.match(r'^#+ ', line)
+    if match and len(match.group(0)) <= 5 and not in_code_block:
+        title_length = len(match.group(0)) - 1
+        curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-')))
+
+        # update the higher order current directories
+        for i in range(title_length + 1, MAX_TITLE_DEPTH + 1):
+            curr_dirs[i] = curr_dirs[title_length]
+
+        return title_length
+    else:
+        return 0
+
+
+def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     """
     function that replaces certain markdown structures with the equivalent used on the website
 
     :param curr_line: the current line on which markdown structures need to be replaced
     :param linklist: the list used to store links that need to be printed at the end of the file
     :param in_code_block: boolean indicating whether the current line is part of a code block
+    :param main_title: the main title of the file that is being processed
     :return curr_line: the adapted current line
     :return linklist: the updated linklist
     """
@@ -128,7 +153,13 @@ def replace_markdown_markers(curr_line, linklist, in_code_block):
     if matches:
         for match in matches:
             curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]")
-            linklist.append(match[1])
+            if ".md" not in match[1]:
+                if "#" not in match[1]:
+                    linklist.append(match[1])
+                else:
+                    linklist.append(DOCS_URL + main_title + "/" + match[1])
+            else:
+                linklist.append(DOCS_URL + match[1].replace(".md", "/").replace("index", "").rstrip("/"))
 
     # codeblock (with ``` -> always stands on a separate line, so line can be dropped)
     if '```' in curr_line:
@@ -166,7 +197,6 @@ def replace_markdown_markers(curr_line, linklist, in_code_block):
 
             # keep the rest
             else:
-                # print("<" + content + ">")
                 pass
 
     # structures with !!! (info, tips, warnings)
@@ -199,6 +229,91 @@ def replace_markdown_markers(curr_line, linklist, in_code_block):
     return curr_line, linklist
 
 
+def split_text(file, main_title):
+
+    # start of assuming we haven't encountered a title
+    after_first_title = False
+
+    # start of assuming we are not in a code_block
+    in_code_block = False
+
+    # define initial dictionaries
+    paragraphs_text = {}
+    paragraphs_metadata = {}
+
+    # list to keep track of links in the text
+    link_list = []
+
+    # list to keep track of the order of the subtitles
+    subtitle_order = []
+
+    # variable to keep track of the title level
+    title_level = 0
+
+    # list to keep track of most recent directories on each title level
+    if LINUX_TUTORIAL not in file:
+        curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)]
+    else:
+        curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)]
+
+    with open(file, 'r') as readfile:
+
+        for line in readfile:
+
+            # keep track of title level and directory to write to metadata upon discovering a new subtitle
+            if title_level > 0:
+                last_title_level = title_level
+                last_dir = curr_dirs[last_title_level]
+
+            title_level = check_for_title_simple(line, in_code_block, curr_dirs)
+
+            # detect codeblocks to make sure titles aren't detected in them
+            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+                in_code_block = not in_code_block
+
+            # line is a title with a maximum depth of 4
+            if title_level > 0:
+                if after_first_title:
+                    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
+                title = make_valid_title(line[title_level + 1:-1])
+
+                # create an entry for the file in the paragraphs text dictionary
+                paragraphs_text[title] = ""
+
+                after_first_title = True
+                subtitle_order.append(title)
+
+                # reset link_list
+                link_list = []
+
+            # line is not a title
+            elif after_first_title:
+                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
+                if title in paragraphs_text.keys() and line != "\n":
+                    paragraphs_text[title] += line
+                elif line != "\n":
+                    paragraphs_text[title] = line
+
+    # write metadata for the last file
+    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level])
+
+    return paragraphs_text, paragraphs_metadata, subtitle_order
+
+
+def write_metadata(main_title, subtitle, links, title_level, directory):
+
+    paragraph_metadata = {'main_title': main_title, 'subtitle': subtitle, 'title_depth': title_level, 'directory': directory}
+
+    if len(links) > 0:
+        paragraph_metadata['links'] = {}
+        for i, link in enumerate(links):
+            paragraph_metadata['links'][str(i)] = link
+
+    paragraph_metadata['parent_title'] = Path(directory).parent.name
+
+    return paragraph_metadata
+
+
 def jinja_parser(filename, copy_location):
     """
     function that let's jinja do its thing to format the files except for the os-related if-statements
@@ -434,7 +549,7 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block):
         os_list = [GENERIC_DIR, LINUX, WINDOWS, MACOS]
         for i, os_ in enumerate(os_list):
             if os_ in file_name:
-                curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block)
+                curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block, "placeholder")
 
         if CONTENT in data:
             data[CONTENT] += curr_line
@@ -532,6 +647,66 @@ def make_valid_title(title):
     return valid_filename
 
 
+def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number):
+
+    # make the directory needed for the files that will be written
+    filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"])
+    os.makedirs(filepath)
+
+    write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC)
+
+
+def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number):
+    for i, OS in enumerate([LINUX, WINDOWS, MACOS]):
+
+        # Unmangle if's to use jinja parser
+        paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title])
+
+        # Use jinja to render a different version of the text for each OS
+        template = Template(paragraphs_text[title])
+        text = template.render(OS=OS)
+
+        # define the filepath
+        filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"])
+        os.makedirs(filepath)
+
+        # write the files
+        write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS)
+
+
+def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS):
+    # write text file
+    with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + ".txt"), 'w') as writefile:
+        writefile.write(text)
+
+    # write metadata
+    metadata = paragraphs_metadata[title]
+
+    if title_order_number != 0:
+        metadata["previous_title"] = title_order[title_order_number - 1]
+    else:
+        metadata["previous_title"] = None
+
+    if title_order_number != len(title_order) - 1:
+        metadata["next_title"] = title_order[title_order_number + 1]
+    else:
+        metadata["next_title"] = None
+
+    metadata["OS"] = OS
+
+    if bool(LINUX_TUTORIAL in paragraphs_metadata[title]["directory"]):
+        linux_part = LINUX_TUTORIAL + "/"
+    else:
+        linux_part = ""
+    if OS == GENERIC:
+        os_part = ""
+    else:
+        os_part = OS + "/"
+    metadata["reference_link"] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title]["main_title"] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-')
+
+    with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + "_metadata.json"), 'w') as writefile:
+        json.dump(metadata, writefile, indent=4)
+
 def main():
     """
     main function
@@ -557,21 +732,27 @@ def main():
 
     ################### define loop-invariant variables ###################
 
-    # variable that keeps track of the source directories
-    source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
-                          os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
-
-    # list of all the filenames
+    # # variable that keeps track of the source directories
+    # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
+    #                       os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
+    #
+    # # list of all the filenames
+    # filenames_generic = {}
+    # filenames_linux = {}
+    # for source_directory in source_directories:
+    #     all_items = os.listdir(source_directory)
+    #     files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+    #     for file in files:
+    #         if LINUX_TUTORIAL in source_directory:
+    #             filenames_linux[file] = os.path.join(source_directory, file)
+    #         else:
+    #             filenames_generic[file] = os.path.join(source_directory, file)
+
+    # Temporary variables to test with just one singular file
     filenames_generic = {}
     filenames_linux = {}
-    for source_directory in source_directories:
-        all_items = os.listdir(source_directory)
-        files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
-        for file in files:
-            if LINUX_TUTORIAL in source_directory:
-                filenames_linux[file] = os.path.join(source_directory, file)
-            else:
-                filenames_generic[file] = os.path.join(source_directory, file)
+    filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md"
+    filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
 
     # for loops over all files
     for filenames in [filenames_generic, filenames_linux]:
@@ -621,6 +802,10 @@ def main():
             # dictionaries to keep track of current OS
             active_OS_if_states = {LINUX: INACTIVE, WINDOWS: INACTIVE, MACOS: INACTIVE}
 
+            # dictionaries to save the paragraphs and metadata before it is written to files
+            paragraphs_text = {}
+            paragraphs_metadata = {}
+
             # variable that shows whether the first title has been reached yet
             after_first_title = False
 
@@ -636,37 +821,56 @@ def main():
             # process the jinja macros
             jinja_parser(filename, copy_file)
 
-            # open the file and store line by line in the right file
-            with open(copy_file, 'r') as readfile:
-
-                for line in readfile:
-                    title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block)
+            # split the text in paragraphs
+            paragraphs_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title)
 
-                    # detect codeblocks to make sure titles aren't detected in them
-                    if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
-                        in_code_block = not in_code_block
+            # for every section, either make the whole section generic, or create an os-specific file for each OS
+            for i, subtitle in enumerate(subtitle_order):
 
-                    # line is a title with a maximum depth of 4
-                    if title_level > 0:
-                        last_title = title
-                        last_directory = directory
-                        after_first_title = True
+                # generic
+                if IF_MANGLED_PART not in paragraphs_text[subtitle]:
+                    write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i)
 
-                    # line is not a title
-                    elif after_first_title:
-                        # check for if-statements and write the appropriate lines in the right files
-                        next_action = check_if_statements(line, active_OS_if_states)
-                        while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE:
-                            if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE:
-                                link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
-                            next_action = check_if_statements(next_action[1], active_OS_if_states)
-
-                        if next_action[0] == WRITE_TEXT:
-                            link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
-
-            # write end of file for the last file
-            for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
-                write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title)
+                # os-specific
+                else:
+                    write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i)
+
+
+            # # open the file and store line by line in the right file
+            # with open(copy_file, 'r') as readfile:
+            #
+            #     for line in readfile:
+            #         title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block)
+            #
+            #         # detect codeblocks to make sure titles aren't detected in them
+            #         if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+            #             in_code_block = not in_code_block
+            #
+            #         # line is a title with a maximum depth of 4
+            #         if title_level > 0:
+            #             last_title = title
+            #             last_directory = directory
+            #             after_first_title = True
+            #
+            #         # line is not a title
+            #         elif after_first_title:
+            #             # check for if-statements and write the appropriate lines in the right files
+            #             next_action = check_if_statements(line, active_OS_if_states)
+            #             while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE:
+            #                 if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE:
+            #                     link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
+            #                 next_action = check_if_statements(next_action[1], active_OS_if_states)
+            #
+            #             if next_action[0] == WRITE_TEXT:
+            #                 link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
+            #
+            # # write end of file for the last file
+            # for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
+            #     write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title)
+
+            print(paragraphs_text)
+            print(paragraphs_metadata)
+            print(subtitle_order)
 
     # remove_directory_tree(COPIES)
     # remove_directory_tree(IF_MANGLED_FILES)
diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt
index 19ed8a2a29d..907f08fda77 100644
--- a/scripts/HPC_chatbot_preprocessor/requirements.txt
+++ b/scripts/HPC_chatbot_preprocessor/requirements.txt
@@ -3,4 +3,5 @@ re
 shutil
 pypandoc
 yaml
-jinja2
\ No newline at end of file
+jinja2
+pathlib
\ No newline at end of file

From 38c45723441d13cde3c799f4aa76bff9f4093bfe Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 20 Aug 2024 14:42:28 +0200
Subject: [PATCH 053/152] added support to filter out collapsable admonitions

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 72aa40292f9..4c75df17af0 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -203,6 +203,10 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     if '!!!' in curr_line:
         curr_line = re.sub(r'!!!', "", curr_line)
 
+    # structures with ??? (collapsable admonitions)
+    if '???' in curr_line:
+        curr_line = re.sub(r'\?\?\?', "", curr_line)
+
     # get rid of other markdown indicators (`, *, +, _)
     if not in_code_block:
 

From 5cbd6533333b0226d812fec08c62b3001ba53ade Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 20 Aug 2024 16:49:51 +0200
Subject: [PATCH 054/152] attempt at fix for problems with jinja include, not
 working yet

---
 .../chatbot_parser.py                         | 51 ++++++++++++++++---
 1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 4c75df17af0..c1bd1a99b60 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -254,6 +254,9 @@ def split_text(file, main_title):
     # variable to keep track of the title level
     title_level = 0
 
+    # variable to allow for if statements to "continue" over multiple paragraphs
+    open_ifs = ""
+
     # list to keep track of most recent directories on each title level
     if LINUX_TUTORIAL not in file:
         curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)]
@@ -278,11 +281,12 @@ def split_text(file, main_title):
             # line is a title with a maximum depth of 4
             if title_level > 0:
                 if after_first_title:
+                    paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title])
                     paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
                 title = make_valid_title(line[title_level + 1:-1])
 
                 # create an entry for the file in the paragraphs text dictionary
-                paragraphs_text[title] = ""
+                paragraphs_text[title] = open_ifs
 
                 after_first_title = True
                 subtitle_order.append(title)
@@ -318,6 +322,38 @@ def write_metadata(main_title, subtitle, links, title_level, directory):
     return paragraph_metadata
 
 
+def close_ifs(text):
+    patterns = {
+        'if': r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})',
+        'endif': r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})',
+        'else': r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})'
+    }
+    if_count = len(re.findall(patterns['if'], text.replace("\n", "")))
+    endif_count = len(re.findall(patterns['endif'], text.replace("\n", "")))
+    if IF_MANGLED_PART not in text or if_count == endif_count:
+        return text, ""
+    else:
+
+        # Find all matches for each pattern
+        matches = []
+        for key, pattern in patterns.items():
+            for match in re.finditer(pattern, text):
+                matches.append(match)
+
+        # sort the matches according to their start index
+        matches.sort(key=lambda x: x.start())
+
+        # extract the strings from the matches
+        open_ifs = []
+        for match in matches:
+            open_ifs.append(match.group(0))
+
+        # Concatenate all matches into a single string
+        open_ifs = ''.join(open_ifs)
+
+        return text + r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}', open_ifs
+
+
 def jinja_parser(filename, copy_location):
     """
     function that let's jinja do its thing to format the files except for the os-related if-statements
@@ -380,6 +416,7 @@ def mangle_os_ifs(line, is_os):
         if_match = re.search(r'if ', match.group(1))
         if_os_match = re.search(r'if OS ', match.group(1))
         endif_match = re.search(r'endif', match.group(1))
+        else_match = re.search(r'else', match.group(1))
 
         # mangle positions
         pos_first_mangle = constr_match.start() + start_index + added_length + 1
@@ -416,7 +453,7 @@ def mangle_os_ifs(line, is_os):
                 else:
                     is_os = NON_OS_IF
                     
-        else:
+        elif else_match:
             if is_os in (OS_IF, OS_IF_IN_OS_IF):
                 line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
                 added_length += 2 * len(IF_MANGLED_PART)
@@ -655,7 +692,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
 
     # make the directory needed for the files that will be written
     filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"])
-    os.makedirs(filepath)
+    os.makedirs(filepath, exist_ok=True)
 
     write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC)
 
@@ -672,7 +709,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
 
         # define the filepath
         filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"])
-        os.makedirs(filepath)
+        os.makedirs(filepath, exist_ok=True)
 
         # write the files
         write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS)
@@ -761,6 +798,7 @@ def main():
     # for loops over all files
     for filenames in [filenames_generic, filenames_linux]:
         for filename in filenames.keys():
+            # print(filename)
             ################### define/reset loop specific variables ###################
 
             # variable that keeps track of whether file is part of the linux tutorial
@@ -830,6 +868,7 @@ def main():
 
             # for every section, either make the whole section generic, or create an os-specific file for each OS
             for i, subtitle in enumerate(subtitle_order):
+                # print(subtitle)
 
                 # generic
                 if IF_MANGLED_PART not in paragraphs_text[subtitle]:
@@ -872,10 +911,6 @@ def main():
             # for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
             #     write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title)
 
-            print(paragraphs_text)
-            print(paragraphs_metadata)
-            print(subtitle_order)
-
     # remove_directory_tree(COPIES)
     # remove_directory_tree(IF_MANGLED_FILES)
 

From 0e6f8b27f19c2256880960c9ee48b680045c5419 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 10:02:41 +0200
Subject: [PATCH 055/152] fixed an issue with jinja templates

---
 .../chatbot_parser.py                         | 60 ++++++++++++-------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index c1bd1a99b60..e72dc0643de 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -5,7 +5,7 @@
 import yaml
 from itertools import chain
 from pathlib import Path
-from jinja2 import FileSystemLoader, Environment, ChoiceLoader, Template
+from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template
 
 #################### define macro's ####################
 # customizable macros
@@ -381,7 +381,7 @@ def jinja_parser(filename, copy_location):
     mangle_ifs(copy_location, filename)
 
     # Use Jinja2 to replace the macros
-    template_loader = ChoiceLoader([FileSystemLoader(searchpath=IF_MANGLED_FILES), FileSystemLoader(searchpath=os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR))])
+    template_loader = ChoiceLoader([FileSystemLoader(searchpath=[IF_MANGLED_FILES, os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR)]), FunctionLoader(load_macros)])
     templateEnv = Environment(loader=template_loader)
     template = templateEnv.get_template(filename)
     rendered_content = template.render(combined_context)
@@ -391,6 +391,24 @@ def jinja_parser(filename, copy_location):
         output_file.write(rendered_content)
 
 
+def load_macros(name):
+    """
+    function used by the jinja FunctionLoader to retrieve templates from the macros folder since the normal FileSystemLoader can't locate them properly
+
+    :param name: name of the package
+    :return:
+    """
+
+    macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, "macros")
+
+    if "../macros/" in name:
+        package_name = name.split("../macros/")[1]
+        file_location = os.path.join(macros_location, package_name)
+
+        with open(file_location, 'r') as readfile:
+            return readfile.read()
+
+
 def mangle_os_ifs(line, is_os):
     """
     function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure.
@@ -773,27 +791,27 @@ def main():
 
     ################### define loop-invariant variables ###################
 
-    # # variable that keeps track of the source directories
-    # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
-    #                       os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
-    #
-    # # list of all the filenames
-    # filenames_generic = {}
-    # filenames_linux = {}
-    # for source_directory in source_directories:
-    #     all_items = os.listdir(source_directory)
-    #     files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
-    #     for file in files:
-    #         if LINUX_TUTORIAL in source_directory:
-    #             filenames_linux[file] = os.path.join(source_directory, file)
-    #         else:
-    #             filenames_generic[file] = os.path.join(source_directory, file)
-
-    # Temporary variables to test with just one singular file
+    # variable that keeps track of the source directories
+    source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
+                          os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
+
+    # list of all the filenames
     filenames_generic = {}
     filenames_linux = {}
-    filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md"
-    filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
+    for source_directory in source_directories:
+        all_items = os.listdir(source_directory)
+        files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+        for file in files:
+            if LINUX_TUTORIAL in source_directory:
+                filenames_linux[file] = os.path.join(source_directory, file)
+            else:
+                filenames_generic[file] = os.path.join(source_directory, file)
+
+    # # Temporary variables to test with just one singular file
+    # filenames_generic = {}
+    # filenames_linux = {}
+    # filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md"
+    # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
 
     # for loops over all files
     for filenames in [filenames_generic, filenames_linux]:

From cd778370a6cab55700d3e66745a049d9a644b3f9 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 10:31:10 +0200
Subject: [PATCH 056/152] added docstrings to new functions

---
 .../chatbot_parser.py                         | 79 +++++++++++++++++--
 1 file changed, 73 insertions(+), 6 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index e72dc0643de..938da0628c8 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -62,7 +62,7 @@
 
 
 ################### define functions ###################
-def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_):
+def check_for_title_xl(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_):
     """
     function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables
 
@@ -115,8 +115,15 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs
         return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists
 
 
-def check_for_title_simple(line, in_code_block, curr_dirs):
+def check_for_title(line, in_code_block, curr_dirs):
+    """
+    function that checks for titles in the current line. Used by split_text to split the text among the subtitles
 
+    :param line: the current line to be checked for a title
+    :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles
+    :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found
+    :return title_length: The amount of hashtags in front of the title on the current line
+    """
     # detect titles
     match = re.match(r'^#+ ', line)
     if match and len(match.group(0)) <= 5 and not in_code_block:
@@ -234,6 +241,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
 
 
 def split_text(file, main_title):
+    """
+    Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata
+    :param file: the filepath of the file to be split
+    :param main_title: the main title of the file
+    :return paragraphs_text: dictionary containing the split sections of text
+    :return paragraphs_metadata: dictionary containing the metadata of each split section of text
+    :return subtitle_order: list containing all encountered subtitles in order of appearance
+    """
 
     # start of assuming we haven't encountered a title
     after_first_title = False
@@ -272,7 +287,7 @@ def split_text(file, main_title):
                 last_title_level = title_level
                 last_dir = curr_dirs[last_title_level]
 
-            title_level = check_for_title_simple(line, in_code_block, curr_dirs)
+            title_level = check_for_title(line, in_code_block, curr_dirs)
 
             # detect codeblocks to make sure titles aren't detected in them
             if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
@@ -309,6 +324,16 @@ def split_text(file, main_title):
 
 
 def write_metadata(main_title, subtitle, links, title_level, directory):
+    """
+    Function that writes metadata about a text section to a dictionary
+
+    :param main_title: The main title of the file containing the section
+    :param subtitle: the title of the section
+    :param links: a list of links contained within the section
+    :param title_level: the depth of the title of the section
+    :param directory: the directory where the section will eventually be written (can either be generic or os-specific)
+    :return paragraph_metadata: dictionary containing the metadata about the section
+    """
 
     paragraph_metadata = {'main_title': main_title, 'subtitle': subtitle, 'title_depth': title_level, 'directory': directory}
 
@@ -323,6 +348,17 @@ def write_metadata(main_title, subtitle, links, title_level, directory):
 
 
 def close_ifs(text):
+    """
+    Function to check whether all if-statements in a section are closed properly. If that is not the case, the function
+    closes all if-statements at the end of the section and returns a prefix for the next section containing all if-statements
+    of the section it is processing. This needs to be done because the start of the next section would also be contained within the
+    last unclosed if-statement of its previous section.
+
+    :param text: the text of the section it checks
+    :return text: the adapted text where all if-statements are closed
+    :return prefix: the prefix for the next section
+    """
+
     patterns = {
         'if': r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})',
         'endif': r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})',
@@ -707,6 +743,16 @@ def make_valid_title(title):
 
 
 def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number):
+    """
+    Function that writes text and metadata of a generic (non-os-specific) file
+
+    :param title: title of section
+    :param paragraphs_text: dictionary containing all paragraphs of text
+    :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
+    :param title_order: list containing all subtitles in order
+    :param title_order_number: order number of the title of the section that is being written
+    :return:
+    """
 
     # make the directory needed for the files that will be written
     filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"])
@@ -716,6 +762,16 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
 
 
 def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number):
+    """
+    Function that writes text and metadata of os-specific files
+
+    :param title: title of section
+    :param paragraphs_text: dictionary containing all paragraphs of text
+    :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
+    :param title_order: list containing all subtitles in order
+    :param title_order_number: order number of the title of the section that is being written
+    :return:
+    """
     for i, OS in enumerate([LINUX, WINDOWS, MACOS]):
 
         # Unmangle if's to use jinja parser
@@ -734,6 +790,19 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
 
 
 def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS):
+    """
+    Function to write files to a certain filepath
+
+    :param title: title of the section to be written
+    :param text: section of text to be written
+    :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
+    :param title_order: list containing all subtitles in order
+    :param title_order_number: order number of the title of the section that is being written
+    :param filepath: filepath to write files to
+    :param OS: OS to be included in the metadata
+    :return:
+    """
+
     # write text file
     with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + ".txt"), 'w') as writefile:
         writefile.write(text)
@@ -816,7 +885,6 @@ def main():
     # for loops over all files
     for filenames in [filenames_generic, filenames_linux]:
         for filename in filenames.keys():
-            # print(filename)
             ################### define/reset loop specific variables ###################
 
             # variable that keeps track of whether file is part of the linux tutorial
@@ -896,12 +964,11 @@ def main():
                 else:
                     write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i)
 
-
             # # open the file and store line by line in the right file
             # with open(copy_file, 'r') as readfile:
             #
             #     for line in readfile:
-            #         title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block)
+            #         title_level, title, directory, curr_dirs, link_lists = check_for_title_xl(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block)
             #
             #         # detect codeblocks to make sure titles aren't detected in them
             #         if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):

From 98eb695790b30cfbde32c0b837a318ed11c88d59 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 11:24:35 +0200
Subject: [PATCH 057/152] only add necessary if-statements in front of
 non-if-complete sections

---
 .../chatbot_parser.py                         | 28 +++++++++++++++----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 938da0628c8..e15fce4f049 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -384,10 +384,28 @@ def close_ifs(text):
         for match in matches:
             open_ifs.append(match.group(0))
 
+        # only include the non-closed if-statements
+        changed = True
+        while changed:
+            changed = False
+            last_if = -1
+            last_else = -1
+            for i, if_part in enumerate(open_ifs):
+                if re.search(patterns['if'], if_part):
+                    last_if = i
+                elif re.search(patterns['else'], if_part):
+                    last_else = i
+                elif re.search(patterns['endif'], if_part):
+                    changed = True
+                    del open_ifs[i]
+                    if last_else > last_if:
+                        del open_ifs[last_else]
+                    del open_ifs[last_if]
+
         # Concatenate all matches into a single string
         open_ifs = ''.join(open_ifs)
 
-        return text + r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}', open_ifs
+        return text + (r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}')*(if_count - endif_count), open_ifs
 
 
 def jinja_parser(filename, copy_location):
@@ -451,7 +469,7 @@ def mangle_os_ifs(line, is_os):
     We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements.
 
     :param line: the current line to check for os-related if-statements
-    :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF 
+    :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF
         NON_OS_IF: not in an os-if
         NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if
         OS_IF: in an os-if
@@ -492,7 +510,7 @@ def mangle_os_ifs(line, is_os):
                     is_os = OS_IF
             elif is_os == NON_OS_IF_IN_OS_IF:
                 is_os = OS_IF
-                
+
         elif if_match:
             if if_os_match:
                 line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
@@ -506,7 +524,7 @@ def mangle_os_ifs(line, is_os):
                     is_os = NON_OS_IF_IN_OS_IF
                 else:
                     is_os = NON_OS_IF
-                    
+
         elif else_match:
             if is_os in (OS_IF, OS_IF_IN_OS_IF):
                 line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
@@ -879,7 +897,7 @@ def main():
     # # Temporary variables to test with just one singular file
     # filenames_generic = {}
     # filenames_linux = {}
-    # filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md"
+    # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
     # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
 
     # for loops over all files

From 27457e371bdb494c06ac73a6cf4263a69d389631 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 12:12:45 +0200
Subject: [PATCH 058/152] fixed some more jinja problems

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index e15fce4f049..6bc9df169e3 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -401,6 +401,7 @@ def close_ifs(text):
                     if last_else > last_if:
                         del open_ifs[last_else]
                     del open_ifs[last_if]
+                    break
 
         # Concatenate all matches into a single string
         open_ifs = ''.join(open_ifs)
@@ -795,10 +796,16 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
         # Unmangle if's to use jinja parser
         paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title])
 
+        # slightly alter if-statements to be able to use predefined macros
+        paragraphs_text[title] = re.sub(OS, '"' + OS + '"', paragraphs_text[title])
+
         # Use jinja to render a different version of the text for each OS
         template = Template(paragraphs_text[title])
         text = template.render(OS=OS)
 
+        # readjust text to correct overcorrections
+        text = re.sub('"' + OS + '"', OS, text)
+
         # define the filepath
         filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"])
         os.makedirs(filepath, exist_ok=True)

From bb722876b2734c5a9deba84128fae6713e499652 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 12:33:18 +0200
Subject: [PATCH 059/152] implemented extra test to make sure generic files
 dont accidentally get flagged as os-specific

---
 .../chatbot_parser.py                         | 28 ++++++++++++-------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 6bc9df169e3..a7d2fd5b5e2 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -791,7 +791,8 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
     :param title_order_number: order number of the title of the section that is being written
     :return:
     """
-    for i, OS in enumerate([LINUX, WINDOWS, MACOS]):
+    text = {}
+    for OS in [LINUX, WINDOWS, MACOS]:
 
         # Unmangle if's to use jinja parser
         paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title])
@@ -801,17 +802,24 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
 
         # Use jinja to render a different version of the text for each OS
         template = Template(paragraphs_text[title])
-        text = template.render(OS=OS)
+        text[OS] = template.render(OS=OS)
 
         # readjust text to correct overcorrections
-        text = re.sub('"' + OS + '"', OS, text)
-
-        # define the filepath
-        filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"])
-        os.makedirs(filepath, exist_ok=True)
-
-        # write the files
-        write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS)
+        text[OS] = re.sub('"' + OS + '"', OS, text[OS])
+
+    # check that not all versions are the same
+    unique_texts = set(text.values())
+    if len(unique_texts) > 1:
+        for OS in [LINUX, WINDOWS, MACOS]:
+            # define the filepath
+            filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"])
+            os.makedirs(filepath, exist_ok=True)
+
+            # write the files
+            write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS)
+    else:
+        paragraphs_text[title] = text[OS]
+        write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number)
 
 
 def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS):

From 67cb19e874e64d46564a8a5d34abc64c2a65e2a5 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 13:22:56 +0200
Subject: [PATCH 060/152] make sure empty os-specific files are not saved

---
 .../chatbot_parser.py                         | 23 +++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index a7d2fd5b5e2..2c78ad90df3 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -809,17 +809,22 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
 
     # check that not all versions are the same
     unique_texts = set(text.values())
-    if len(unique_texts) > 1:
-        for OS in [LINUX, WINDOWS, MACOS]:
-            # define the filepath
-            filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"])
-            os.makedirs(filepath, exist_ok=True)
-
-            # write the files
-            write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS)
-    else:
+    if len(unique_texts) == 1:
         paragraphs_text[title] = text[OS]
         write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number)
+    else:
+        for OS in [LINUX, WINDOWS, MACOS]:
+            # check that file actually has some content
+            if len(text[OS]) > 0:
+                # define the filepath
+                filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"])
+                os.makedirs(filepath, exist_ok=True)
+
+                # write the files
+                write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS)
+            else:
+                # don't write empty files
+                pass
 
 
 def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS):

From cf9834a25aa1ab3e690cefa76705888605afae2b Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 13:27:05 +0200
Subject: [PATCH 061/152] clean up unused code

---
 .../chatbot_parser.py                         | 294 ------------------
 1 file changed, 294 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 2c78ad90df3..51b4efa00b2 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -62,58 +62,6 @@
 
 
 ################### define functions ###################
-def check_for_title_xl(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_):
-    """
-    function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables
-
-    :param curr_line: the line to be checked for a title
-    :param main_title: the main title of the file, needed in the case where a file is finished
-    :param last_directory: the most recently encountered directory
-    :param last_title: the most recently encountered title
-    :param curr_dirs: the most recent directories at each title level
-    :param root_dirs: a list containing the root directories
-    :param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file
-    :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial
-    :param in_code_block_: boolean to indicate whether the current line is part of a codeblock
-    :return: the depth of the title
-    :return: the title found in the line if any
-    :return: the new directory in which the next file will be written
-    :return link_lists: updated link_lists
-    """
-
-    # detect titles
-    match = re.match(r'^#+ ', curr_line)
-    if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1:
-        logic_output = len(match.group(0)) - 1
-    else:
-        logic_output = 0
-
-    # make necessary changes if a title has been detected
-    if logic_output == 0 or in_code_block_:
-        return 0, None, None, curr_dirs, link_lists
-    else:
-
-        # if a new title is detected, write the end of the previous file
-        if last_title is not None:
-            for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
-                write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title)
-
-            # reset the link lists for each OS
-            for i in range(4):
-                link_lists[i] = []
-
-        # make a new directory corresponding with the new title
-        curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')))
-
-        for i in range(4):
-            os.makedirs(os.path.join(root_dirs[i],  curr_dirs[logic_output]), exist_ok=True)
-
-        # update the higher order current directories
-        for i in range(logic_output + 1, MAX_TITLE_DEPTH + 1):
-            curr_dirs[i] = curr_dirs[logic_output]
-
-        return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists
-
 
 def check_for_title(line, in_code_block, curr_dirs):
     """
@@ -554,191 +502,6 @@ def mangle_ifs(directory, filename):
                 write_file.write(new_line)
 
 
-def check_if_statements(curr_line, active_OS_if_states):
-    """
-    function that checks for if-statements
-
-    :param curr_line: the line to be checked for if-statements to build the directory structure
-    :param active_OS_if_states: dictionary keeping track of the active OS states according to the if-statements
-    :return: the next action to be done with the line:
-                DONE: An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed.
-                CHECK_EXTRA_MESSAGE: An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked.
-                WRITE_TEXT: No if-statement has been found, write the current line to a file (can also be part of the current line)
-                WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line.
-    :return: the extra message to be checked, if any
-    :return: the text to be written to the file, if any
-    """
-    # check whether the first part of the line contains information wrt if-statements
-    match = re.search(r'^\{' + IF_MANGLED_PART + '%(.*?)%' + IF_MANGLED_PART + '}(.*)', curr_line)
-
-    # check whether the line contains information wrt if-statements that is not in its first part
-    match_large = re.search(r'^(.*)(\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '})(.*)', curr_line)
-
-    if match:
-        content = match.group(1)
-
-        # new if-statement wrt OS with '=='
-        if re.search(r'if OS == ', content):
-            OS = content.split()[-1]
-
-            # set new active OS
-            active_OS_if_states[OS] = ACTIVE
-
-            # set other active ones on inactive
-            for other_OS in active_OS_if_states.keys():
-                if other_OS != OS and active_OS_if_states[other_OS] == ACTIVE:
-                    active_OS_if_states[other_OS] = INACTIVE
-
-        # new if-statement wrt OS with '!='
-        elif re.search(r'if OS != ', content):
-            OS = content.split()[-1]
-
-            # set new active OS
-            active_OS_if_states[OS] = INACTIVE
-
-            # set other inactive ones on active
-            for other_OS in active_OS_if_states.keys():
-                if other_OS != OS and active_OS_if_states[other_OS] == INACTIVE:
-                    active_OS_if_states[other_OS] = ACTIVE
-
-        # endif statement wrt OS
-        elif re.search(r'endif', content):
-            if str(1) in active_OS_if_states.values():
-                active_OS_if_states[
-                    list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = ACTIVE
-            else:
-                for key in active_OS_if_states.keys():
-                    active_OS_if_states[key] = INACTIVE
-
-        # else statement wrt OS
-        elif re.search(r'else', content):
-
-            i = 0
-            for i in range(3):
-                if str(i) not in active_OS_if_states.values():
-                    break
-
-            # set the previously active one on inactive until the next endif
-            key_list = list(active_OS_if_states.keys())
-            position = list(active_OS_if_states.values()).index(ACTIVE)
-            active_OS_if_states[key_list[position]] = str(i)
-
-            # set inactive ones on active
-            while INACTIVE in active_OS_if_states.values():
-                position = list(active_OS_if_states.values()).index(INACTIVE)
-                active_OS_if_states[key_list[position]] = ACTIVE
-
-        if len(match.group(2)) != 0:
-            extra_message = match.group(2).lstrip()
-            return CHECK_EXTRA_MESSAGE, extra_message, None
-
-        else:
-            return DONE, None, None
-
-    elif match_large:
-        return WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE, match_large.group(2), match_large.group(1)
-
-    else:
-        return WRITE_TEXT, None, curr_line
-
-
-def write_text_to_file(file_name, curr_line, link_lists, in_code_block):
-    """
-    function that writes a line to a file
-
-    :param file_name: target file to write the line to
-    :param curr_line: line to be written to the file
-    :param link_lists: list containing all the links that will be printed at the end of files
-    :param in_code_block: boolean indicating whether the current line is in a codeblock
-    :return link_lists: updated link_lists
-    """
-
-    if os.path.exists(file_name) or curr_line.strip():
-        if os.path.exists(file_name):
-            with open(file_name, "r") as read_file:
-                data = json.load(read_file)
-        else:
-            data = {}
-
-        os_list = [GENERIC_DIR, LINUX, WINDOWS, MACOS]
-        for i, os_ in enumerate(os_list):
-            if os_ in file_name:
-                curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block, "placeholder")
-
-        if CONTENT in data:
-            data[CONTENT] += curr_line
-        else:
-            data[CONTENT] = curr_line
-
-        with open(file_name, "w") as write_file:
-            json.dump(data, write_file, indent=4)
-
-    return link_lists
-
-
-def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title, root_dirs, link_lists, in_code_block):
-    """
-    function that decides what file to write text to
-
-    :param curr_line: line to be written to a file
-    :param active_OS_if_states: dictionary keeping track of which OSes are active according to the if-statements
-    :param last_directory: most recently made directory
-    :param last_title: the most recently encountered title
-    :param root_dirs: a list with all root directories
-    :param link_lists: list of links that need to be written at the end of the files
-    :param in_code_block: boolean indicating whether the current line is in a code block
-    :return link_lists: an updated link_lists
-    """
-    # check that the line is part of the website for gent
-    if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE:
-        link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block)
-    else:
-        for i, os_ in enumerate([LINUX, WINDOWS, MACOS]):
-            if active_OS_if_states[os_] == ACTIVE:
-                link_lists = write_text_to_file(os.path.join(root_dirs[i + 1], last_directory, last_title + ".json"),
-                                                curr_line, link_lists, in_code_block)
-
-    return link_lists
-
-
-def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_title, last_title):
-    """
-    function that adds the links that should be at the end of a file
-
-    :param file_location: the location of the file
-    :param OS: the OS of the file
-    :param linklist: the links that should be at the end of the file
-    :param is_linux_tutorial_: boolean indicating whether the file is part of the linux tutorial
-    :param main_title: the main title of the file, to be used in the reference link
-    :param last_title: the most recently encountered title
-    :return:
-    """
-
-    if os.path.exists(file_location):
-
-        if len(OS) > 0:
-            OS = OS + "/"
-
-        with open(file_location, "r") as read_file:
-            data = json.load(read_file)
-
-        # add the links from within the document
-        data[LINKS] = {}
-        for i, link in enumerate(linklist):
-            data[LINKS][str(i + 1)] = str(link)
-
-        if is_linux_tutorial_:
-            linux_part = LINUX_TUTORIAL + "/"
-        else:
-            linux_part = ""
-
-        # add the reference link
-        data[REFERENCE_LINK] = (DOCS_URL + "/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-'))
-
-        with open(file_location, 'w') as write_file:
-            json.dump(data, write_file, indent=4)
-
-
 def make_valid_title(title):
     """
     function that makes sure all titles can be used as valid filenames
@@ -946,7 +709,6 @@ def main():
                 root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX)
                 root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS)
                 root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS)
-            root_dirs = [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos]
 
             # variable for the main title (needed for reference links)
             main_title = filename[:-3]
@@ -954,30 +716,6 @@ def main():
             # variable that keeps track of the directories that are used to write in at different levels
             curr_dirs = [filename[:-3] for _ in range(5)]
 
-            # variable that keeps track of the latest non-zero level title and corresponding directory
-            last_title = None
-            last_directory = None
-
-            # list to keep track of links in the text
-            links_generic = []
-            links_linux = []
-            links_windows = []
-            links_macos = []
-            link_lists = [links_generic, links_linux, links_windows, links_macos]
-
-            # dictionaries to keep track of current OS
-            active_OS_if_states = {LINUX: INACTIVE, WINDOWS: INACTIVE, MACOS: INACTIVE}
-
-            # dictionaries to save the paragraphs and metadata before it is written to files
-            paragraphs_text = {}
-            paragraphs_metadata = {}
-
-            # variable that shows whether the first title has been reached yet
-            after_first_title = False
-
-            # variable that is used to be sure that we are detecting titles and not comments from codeblocks
-            in_code_block = False
-
             ################### actually parse the md file ###################
 
             # create directories for the source markdown file
@@ -1002,38 +740,6 @@ def main():
                 else:
                     write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i)
 
-            # # open the file and store line by line in the right file
-            # with open(copy_file, 'r') as readfile:
-            #
-            #     for line in readfile:
-            #         title_level, title, directory, curr_dirs, link_lists = check_for_title_xl(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block)
-            #
-            #         # detect codeblocks to make sure titles aren't detected in them
-            #         if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
-            #             in_code_block = not in_code_block
-            #
-            #         # line is a title with a maximum depth of 4
-            #         if title_level > 0:
-            #             last_title = title
-            #             last_directory = directory
-            #             after_first_title = True
-            #
-            #         # line is not a title
-            #         elif after_first_title:
-            #             # check for if-statements and write the appropriate lines in the right files
-            #             next_action = check_if_statements(line, active_OS_if_states)
-            #             while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE:
-            #                 if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE:
-            #                     link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
-            #                 next_action = check_if_statements(next_action[1], active_OS_if_states)
-            #
-            #             if next_action[0] == WRITE_TEXT:
-            #                 link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block)
-            #
-            # # write end of file for the last file
-            # for i, OS in enumerate(["", "Linux", "Windows", "macOS"]):
-            #     write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title)
-
     # remove_directory_tree(COPIES)
     # remove_directory_tree(IF_MANGLED_FILES)
 

From da32459088fd4bcb0b665df5ab3b24464a585925 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 13:43:04 +0200
Subject: [PATCH 062/152] introduce more macros

---
 .../chatbot_parser.py                         | 70 +++++++++++--------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 51b4efa00b2..91165d97429 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -25,6 +25,7 @@
 EXTRA_DIR = "extra"
 GENERIC_DIR = "generic"
 OS_SPECIFIC_DIR = "os_specific"
+MACROS = "macros"
 
 # OSes
 LINUX = "linux"
@@ -55,11 +56,23 @@
 CHECK_EXTRA_MESSAGE = "check_extra_message"
 WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message"
 
-# JSON attributes
-CONTENT = "content"
+# Metadata attributes
+MAIN_TITLE = "main_title"
+SUBTITLE = "subtitle"
+TITLE_DEPTH = "title_depth"
+DIRECTORY = "directory"
 LINKS = "links"
+PARENT_TITLE = "parent_title"
+PREVIOUS_TITLE = "previous_title"
+NEXT_TITLE = "next_title"
+METADATA_OS = "OS"
 REFERENCE_LINK = "reference_link"
 
+# if-structure components
+IF = "if"
+ELSE = "else"
+ENDIF = "endif"
+
 
 ################### define functions ###################
 
@@ -283,14 +296,14 @@ def write_metadata(main_title, subtitle, links, title_level, directory):
     :return paragraph_metadata: dictionary containing the metadata about the section
     """
 
-    paragraph_metadata = {'main_title': main_title, 'subtitle': subtitle, 'title_depth': title_level, 'directory': directory}
+    paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, TITLE_DEPTH: title_level, DIRECTORY: directory}
 
     if len(links) > 0:
-        paragraph_metadata['links'] = {}
+        paragraph_metadata[LINKS] = {}
         for i, link in enumerate(links):
-            paragraph_metadata['links'][str(i)] = link
+            paragraph_metadata[LINKS][str(i)] = link
 
-    paragraph_metadata['parent_title'] = Path(directory).parent.name
+    paragraph_metadata[PARENT_TITLE] = Path(directory).parent.name
 
     return paragraph_metadata
 
@@ -308,12 +321,12 @@ def close_ifs(text):
     """
 
     patterns = {
-        'if': r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})',
-        'endif': r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})',
-        'else': r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})'
+        IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})',
+        ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})',
+        ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})'
     }
-    if_count = len(re.findall(patterns['if'], text.replace("\n", "")))
-    endif_count = len(re.findall(patterns['endif'], text.replace("\n", "")))
+    if_count = len(re.findall(patterns[IF], text.replace("\n", "")))
+    endif_count = len(re.findall(patterns[ENDIF], text.replace("\n", "")))
     if IF_MANGLED_PART not in text or if_count == endif_count:
         return text, ""
     else:
@@ -339,11 +352,11 @@ def close_ifs(text):
             last_if = -1
             last_else = -1
             for i, if_part in enumerate(open_ifs):
-                if re.search(patterns['if'], if_part):
+                if re.search(patterns[IF], if_part):
                     last_if = i
-                elif re.search(patterns['else'], if_part):
+                elif re.search(patterns[ELSE], if_part):
                     last_else = i
-                elif re.search(patterns['endif'], if_part):
+                elif re.search(patterns[ENDIF], if_part):
                     changed = True
                     del open_ifs[i]
                     if last_else > last_if:
@@ -402,10 +415,10 @@ def load_macros(name):
     :return:
     """
 
-    macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, "macros")
+    macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, MACROS)
 
-    if "../macros/" in name:
-        package_name = name.split("../macros/")[1]
+    if "../" + MACROS + "/" in name:
+        package_name = name.split("../" + MACROS + "/")[1]
         file_location = os.path.join(macros_location, package_name)
 
         with open(file_location, 'r') as readfile:
@@ -537,7 +550,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
     """
 
     # make the directory needed for the files that will be written
-    filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"])
+    filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
     os.makedirs(filepath, exist_ok=True)
 
     write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC)
@@ -580,7 +593,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
             # check that file actually has some content
             if len(text[OS]) > 0:
                 # define the filepath
-                filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"])
+                filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title][DIRECTORY])
                 os.makedirs(filepath, exist_ok=True)
 
                 # write the files
@@ -605,25 +618,25 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
     """
 
     # write text file
-    with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + ".txt"), 'w') as writefile:
+    with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + ".txt"), 'w') as writefile:
         writefile.write(text)
 
     # write metadata
     metadata = paragraphs_metadata[title]
 
     if title_order_number != 0:
-        metadata["previous_title"] = title_order[title_order_number - 1]
+        metadata[PREVIOUS_TITLE] = title_order[title_order_number - 1]
     else:
-        metadata["previous_title"] = None
+        metadata[PREVIOUS_TITLE] = None
 
     if title_order_number != len(title_order) - 1:
-        metadata["next_title"] = title_order[title_order_number + 1]
+        metadata[NEXT_TITLE] = title_order[title_order_number + 1]
     else:
-        metadata["next_title"] = None
+        metadata[NEXT_TITLE] = None
 
-    metadata["OS"] = OS
+    metadata[METADATA_OS] = OS
 
-    if bool(LINUX_TUTORIAL in paragraphs_metadata[title]["directory"]):
+    if bool(LINUX_TUTORIAL in paragraphs_metadata[title][DIRECTORY]):
         linux_part = LINUX_TUTORIAL + "/"
     else:
         linux_part = ""
@@ -631,11 +644,12 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
         os_part = ""
     else:
         os_part = OS + "/"
-    metadata["reference_link"] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title]["main_title"] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-')
+    metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-')
 
-    with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + "_metadata.json"), 'w') as writefile:
+    with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + "_metadata.json"), 'w') as writefile:
         json.dump(metadata, writefile, indent=4)
 
+
 def main():
     """
     main function

From 093200b232c1c6ed5c10530ec6a09717b2aaf263 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 13:48:54 +0200
Subject: [PATCH 063/152] reintroduce logic to remove unnecessary directories

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 91165d97429..8e3141c4b52 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -656,9 +656,9 @@ def main():
     :return:
     """
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
-    shutil.rmtree(PARSED_MDS)
-    shutil.rmtree(COPIES)
-    shutil.rmtree(IF_MANGLED_FILES)
+    shutil.rmtree(PARSED_MDS, ignore_errors=True)
+    shutil.rmtree(COPIES, ignore_errors=True)
+    shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)
 
     # make the necessary directories
     if not os.path.exists(COPIES):
@@ -754,8 +754,8 @@ def main():
                 else:
                     write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i)
 
-    # remove_directory_tree(COPIES)
-    # remove_directory_tree(IF_MANGLED_FILES)
+    shutil.rmtree(COPIES, ignore_errors=True)
+    shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)
 
 
 ################### run the script ###################

From 5d0ffe951e515ee3fb890b82a2431332e92b3d4a Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 14:34:45 +0200
Subject: [PATCH 064/152] added functionality to include links or leave them
 out

---
 .../chatbot_parser.py                         | 75 ++++++++++++++-----
 .../HPC_chatbot_preprocessor/requirements.txt |  1 +
 2 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 8e3141c4b52..33ddefbdbbf 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -1,3 +1,4 @@
+import copy
 import json
 import os
 import re
@@ -73,6 +74,10 @@
 ELSE = "else"
 ENDIF = "endif"
 
+# link indicators
+LINK_BEFORE = r'§link§link§'
+LINK_AFTER = r'§link§link§'
+
 
 ################### define functions ###################
 
@@ -120,14 +125,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line)
     if matches:
         for match in matches:
-            curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]")
+            curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_BEFORE + str(len(linklist)) + LINK_AFTER)
             if ".md" not in match[1]:
                 if "#" not in match[1]:
                     linklist.append(match[1])
                 else:
-                    linklist.append(DOCS_URL + main_title + "/" + match[1])
+                    linklist.append(DOCS_URL + "/" + main_title + "/" + match[1])
             else:
-                linklist.append(DOCS_URL + match[1].replace(".md", "/").replace("index", "").rstrip("/"))
+                linklist.append(DOCS_URL + "/" + match[1].replace(".md", "/").replace("index", "").rstrip("/"))
 
     # codeblock (with ``` -> always stands on a separate line, so line can be dropped)
     if '```' in curr_line:
@@ -617,13 +622,17 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
     :return:
     """
 
+    metadata = copy.deepcopy(paragraphs_metadata[title])
+
     # write text file
     with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + ".txt"), 'w') as writefile:
-        writefile.write(text)
+        if LINKS in paragraphs_metadata[title].keys():
+            adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS])
+            writefile.write(adapted_text)
+        else:
+            writefile.write(text)
 
     # write metadata
-    metadata = paragraphs_metadata[title]
-
     if title_order_number != 0:
         metadata[PREVIOUS_TITLE] = title_order[title_order_number - 1]
     else:
@@ -650,6 +659,32 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
         json.dump(metadata, writefile, indent=4)
 
 
+def insert_links(text, links):
+    """
+    Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT
+
+    :param text: The plaintext that needs to be adapted
+    :param links: The links that might need to be inserted
+    :return text: The adapted plaintext
+    :return links: The links that were actually present in the text
+    """
+
+    present_links = []
+    new_links = {}
+    for link_number in re.finditer(LINK_BEFORE + r'([0-9]*?)' + LINK_AFTER, text):
+        present_links.append(link_number.group(1))
+        if INCLUDE_LINKS_IN_PLAINTEXT:
+            text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, " " + links[link_number.group(1)] + " ", text)
+        else:
+            text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, "", text)
+
+    for link_number in links.keys():
+        if link_number in present_links:
+            new_links[len(new_links.keys())] = links[link_number]
+
+    return text, new_links
+
+
 def main():
     """
     main function
@@ -679,22 +714,22 @@ def main():
     source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
                           os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
 
-    # list of all the filenames
-    filenames_generic = {}
-    filenames_linux = {}
-    for source_directory in source_directories:
-        all_items = os.listdir(source_directory)
-        files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
-        for file in files:
-            if LINUX_TUTORIAL in source_directory:
-                filenames_linux[file] = os.path.join(source_directory, file)
-            else:
-                filenames_generic[file] = os.path.join(source_directory, file)
-
-    # # Temporary variables to test with just one singular file
+    # # list of all the filenames
     # filenames_generic = {}
     # filenames_linux = {}
-    # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
+    # for source_directory in source_directories:
+    #     all_items = os.listdir(source_directory)
+    #     files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+    #     for file in files:
+    #         if LINUX_TUTORIAL in source_directory:
+    #             filenames_linux[file] = os.path.join(source_directory, file)
+    #         else:
+    #             filenames_generic[file] = os.path.join(source_directory, file)
+
+    # Temporary variables to test with just one singular file
+    filenames_generic = {}
+    filenames_linux = {}
+    filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
     # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
 
     # for loops over all files
diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt
index 907f08fda77..3b118535f3b 100644
--- a/scripts/HPC_chatbot_preprocessor/requirements.txt
+++ b/scripts/HPC_chatbot_preprocessor/requirements.txt
@@ -1,3 +1,4 @@
+copy
 os
 re
 shutil

From a3e34a97d0fec915d199b4be0b0a9a62f4b4be4f Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 14:37:16 +0200
Subject: [PATCH 065/152] added functionality to include links or leave them
 out

---
 scripts/HPC_chatbot_preprocessor/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt
index 3b118535f3b..907f08fda77 100644
--- a/scripts/HPC_chatbot_preprocessor/requirements.txt
+++ b/scripts/HPC_chatbot_preprocessor/requirements.txt
@@ -1,4 +1,3 @@
-copy
 os
 re
 shutil

From 7c6154b47023062d4b7b6ff5932b60ccb63d56c3 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 15:36:39 +0200
Subject: [PATCH 066/152] adapt filenames to allow for splitting on something
 other than subtitles

---
 .../chatbot_parser.py                         | 60 +++++++++++--------
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 33ddefbdbbf..5c31199d731 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -12,7 +12,7 @@
 # customizable macros
 MIN_PARAGRAPH_LENGTH = 128
 MAX_TITLE_DEPTH = 4
-INCLUDE_LINKS_IN_PLAINTEXT = True
+INCLUDE_LINKS_IN_PLAINTEXT = False
 
 # directories
 PARSED_MDS = "parsed_mds"
@@ -59,7 +59,7 @@
 
 # Metadata attributes
 MAIN_TITLE = "main_title"
-SUBTITLE = "subtitle"
+SUBTITLE = "subtitle (incorrect in some cases, working on a fix)"
 TITLE_DEPTH = "title_depth"
 DIRECTORY = "directory"
 LINKS = "links"
@@ -542,7 +542,7 @@ def make_valid_title(title):
     return valid_filename
 
 
-def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number):
+def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers):
     """
     Function that writes text and metadata of a generic (non-os-specific) file
 
@@ -551,6 +551,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
     :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
     :param title_order: list containing all subtitles in order
     :param title_order_number: order number of the title of the section that is being written
+    :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS
     :return:
     """
 
@@ -558,10 +559,10 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
     filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
     os.makedirs(filepath, exist_ok=True)
 
-    write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC)
+    write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers)
 
 
-def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number):
+def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers):
     """
     Function that writes text and metadata of os-specific files
 
@@ -570,6 +571,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
     :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
     :param title_order: list containing all subtitles in order
     :param title_order_number: order number of the title of the section that is being written
+    :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS
     :return:
     """
     text = {}
@@ -592,7 +594,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
     unique_texts = set(text.values())
     if len(unique_texts) == 1:
         paragraphs_text[title] = text[OS]
-        write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number)
+        write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers)
     else:
         for OS in [LINUX, WINDOWS, MACOS]:
             # check that file actually has some content
@@ -602,13 +604,13 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
                 os.makedirs(filepath, exist_ok=True)
 
                 # write the files
-                write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS)
+                write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers=paragraph_numbers)
             else:
                 # don't write empty files
                 pass
 
 
-def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS):
+def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers):
     """
     Function to write files to a certain filepath
 
@@ -619,13 +621,14 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
     :param title_order_number: order number of the title of the section that is being written
     :param filepath: filepath to write files to
     :param OS: OS to be included in the metadata
+    :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS
     :return:
     """
 
     metadata = copy.deepcopy(paragraphs_metadata[title])
 
     # write text file
-    with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + ".txt"), 'w') as writefile:
+    with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + ".txt"), 'w') as writefile:
         if LINKS in paragraphs_metadata[title].keys():
             adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS])
             writefile.write(adapted_text)
@@ -655,9 +658,11 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
         os_part = OS + "/"
     metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-')
 
-    with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + "_metadata.json"), 'w') as writefile:
+    with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + "_metadata.json"), 'w') as writefile:
         json.dump(metadata, writefile, indent=4)
 
+    paragraph_numbers[OS] += 1
+
 
 def insert_links(text, links):
     """
@@ -714,22 +719,22 @@ def main():
     source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
                           os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
 
-    # # list of all the filenames
-    # filenames_generic = {}
-    # filenames_linux = {}
-    # for source_directory in source_directories:
-    #     all_items = os.listdir(source_directory)
-    #     files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
-    #     for file in files:
-    #         if LINUX_TUTORIAL in source_directory:
-    #             filenames_linux[file] = os.path.join(source_directory, file)
-    #         else:
-    #             filenames_generic[file] = os.path.join(source_directory, file)
-
-    # Temporary variables to test with just one singular file
+    # list of all the filenames
     filenames_generic = {}
     filenames_linux = {}
-    filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
+    for source_directory in source_directories:
+        all_items = os.listdir(source_directory)
+        files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+        for file in files:
+            if LINUX_TUTORIAL in source_directory:
+                filenames_linux[file] = os.path.join(source_directory, file)
+            else:
+                filenames_generic[file] = os.path.join(source_directory, file)
+
+    # # Temporary variables to test with just one singular file
+    # filenames_generic = {}
+    # filenames_linux = {}
+    # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
     # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
 
     # for loops over all files
@@ -765,6 +770,9 @@ def main():
             # variable that keeps track of the directories that are used to write in at different levels
             curr_dirs = [filename[:-3] for _ in range(5)]
 
+            # dictionary that keeps track of the paragraph numbers
+            paragraph_numbers = {GENERIC: 1, LINUX: 1, WINDOWS: 1, MACOS: 1}
+
             ################### actually parse the md file ###################
 
             # create directories for the source markdown file
@@ -783,11 +791,11 @@ def main():
 
                 # generic
                 if IF_MANGLED_PART not in paragraphs_text[subtitle]:
-                    write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i)
+                    write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers)
 
                 # os-specific
                 else:
-                    write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i)
+                    write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers)
 
     shutil.rmtree(COPIES, ignore_errors=True)
     shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)

From 8d5b50dc727e284917eb1540d91f692f56ff8a4a Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 16:19:57 +0200
Subject: [PATCH 067/152] making some changes to prepare to add paragraph level
 splitting tomorrow

---
 .../chatbot_parser.py                         | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 5c31199d731..742522e6e70 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -13,6 +13,7 @@
 MIN_PARAGRAPH_LENGTH = 128
 MAX_TITLE_DEPTH = 4
 INCLUDE_LINKS_IN_PLAINTEXT = False
+DEEP_DIRECTORIES = True
 
 # directories
 PARSED_MDS = "parsed_mds"
@@ -64,8 +65,8 @@
 DIRECTORY = "directory"
 LINKS = "links"
 PARENT_TITLE = "parent_title"
-PREVIOUS_TITLE = "previous_title"
-NEXT_TITLE = "next_title"
+PREVIOUS_SUBTITLE = "previous_title"
+NEXT_SUBTITLE = "next_title"
 METADATA_OS = "OS"
 REFERENCE_LINK = "reference_link"
 
@@ -75,8 +76,7 @@
 ENDIF = "endif"
 
 # link indicators
-LINK_BEFORE = r'§link§link§'
-LINK_AFTER = r'§link§link§'
+LINK_MARKER = r'§link§link§'
 
 
 ################### define functions ###################
@@ -94,11 +94,12 @@ def check_for_title(line, in_code_block, curr_dirs):
     match = re.match(r'^#+ ', line)
     if match and len(match.group(0)) <= 5 and not in_code_block:
         title_length = len(match.group(0)) - 1
-        curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-')))
+        if DEEP_DIRECTORIES:
+            curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-')))
 
-        # update the higher order current directories
-        for i in range(title_length + 1, MAX_TITLE_DEPTH + 1):
-            curr_dirs[i] = curr_dirs[title_length]
+            # update the higher order current directories
+            for i in range(title_length + 1, MAX_TITLE_DEPTH + 1):
+                curr_dirs[i] = curr_dirs[title_length]
 
         return title_length
     else:
@@ -125,7 +126,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line)
     if matches:
         for match in matches:
-            curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_BEFORE + str(len(linklist)) + LINK_AFTER)
+            curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER)
             if ".md" not in match[1]:
                 if "#" not in match[1]:
                     linklist.append(match[1])
@@ -676,12 +677,12 @@ def insert_links(text, links):
 
     present_links = []
     new_links = {}
-    for link_number in re.finditer(LINK_BEFORE + r'([0-9]*?)' + LINK_AFTER, text):
+    for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text):
         present_links.append(link_number.group(1))
         if INCLUDE_LINKS_IN_PLAINTEXT:
-            text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, " " + links[link_number.group(1)] + " ", text)
+            text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text)
         else:
-            text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, "", text)
+            text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text)
 
     for link_number in links.keys():
         if link_number in present_links:

From 0c10376f1f3d5ea56f3ddc32fa580ff436413a73 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 16:20:41 +0200
Subject: [PATCH 068/152] making some changes to prepare to add paragraph level
 splitting tomorrow

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 742522e6e70..1c13edc93e3 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -13,6 +13,8 @@
 MIN_PARAGRAPH_LENGTH = 128
 MAX_TITLE_DEPTH = 4
 INCLUDE_LINKS_IN_PLAINTEXT = False
+SPLIT_ON_TITLES = True
+SPLIT_ON_PARAGRAPHS = False
 DEEP_DIRECTORIES = True
 
 # directories

From f8ee8607545a5638de94787bb00046226e19cce0 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 16:30:55 +0200
Subject: [PATCH 069/152] making some changes to prepare to add paragraph level
 splitting tomorrow

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 1c13edc93e3..561e112d28e 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -640,14 +640,14 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
 
     # write metadata
     if title_order_number != 0:
-        metadata[PREVIOUS_TITLE] = title_order[title_order_number - 1]
+        metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1]
     else:
-        metadata[PREVIOUS_TITLE] = None
+        metadata[PREVIOUS_SUBTITLE] = None
 
     if title_order_number != len(title_order) - 1:
-        metadata[NEXT_TITLE] = title_order[title_order_number + 1]
+        metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1]
     else:
-        metadata[NEXT_TITLE] = None
+        metadata[NEXT_SUBTITLE] = None
 
     metadata[METADATA_OS] = OS
 

From 6533733a4d462db37544251af7a9d33697ad63bb Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 17:04:10 +0200
Subject: [PATCH 070/152] adapted the parsing script to allow for testing in a
 semi-efficient way

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 561e112d28e..43cb93c5c08 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -531,7 +531,7 @@ def make_valid_title(title):
     :return valid_filename: the adapted title that can be used as filename
     """
     # Define a regex pattern for invalid characters on both Windows and Linux
-    invalid_chars = r'[<>:"/\\|?*\0()]'
+    invalid_chars = r'[<>:"/\\|?*\0]'
 
     # get rid of extra information between {} brackets
     title = re.sub(r'\{.*?}', '', title)
@@ -805,6 +805,7 @@ def main():
 
 
 ################### run the script ###################
-print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
-main()
-print("Parsing finished successfully")
+if __name__ == '__main__':
+    print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
+    main()
+    print("Parsing finished successfully")

From 2e7a00f1b724e77249caef30c62e8aa6c6c9f628 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 17:04:35 +0200
Subject: [PATCH 071/152] added test for make_valid_title

---
 .../tests/test_make_valid_title.py            | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
new file mode 100644
index 00000000000..f3c423ed9c3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
@@ -0,0 +1,20 @@
+import pytest
+import shutil
+from chatbot_parser import make_valid_title
+
+
+@pytest.mark.parametrize("input_string,expected", [
+    ("", ""),
+    ("A-good-filename-with-dashes", "A-good-filename-with-dashes"),
+    (" A very good filename beginning and ending in a space ", "A very good filename beginning and ending in a space"),
+    ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"),
+    ("A filename containing bad characters <>:\"/\\|?*\0", "A filename containing bad characters"),
+    ("A filename ending with {some jinja garbage}", "A filename ending with")
+])
+def test_make_valid_title(input_string, expected):
+    assert make_valid_title(input_string) == expected
+
+
+shutil.rmtree("parsed_mds", ignore_errors=True)
+shutil.rmtree("copies", ignore_errors=True)
+shutil.rmtree("if_mangled_files", ignore_errors=True)

From f5e0579fb6a83f1a8e643fc5b1b77309080bf0e3 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 21 Aug 2024 17:07:49 +0200
Subject: [PATCH 072/152] removed useless lines from testscript

---
 .../HPC_chatbot_preprocessor/tests/test_make_valid_title.py  | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
index f3c423ed9c3..aebecddd0f3 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
@@ -13,8 +13,3 @@
 ])
 def test_make_valid_title(input_string, expected):
     assert make_valid_title(input_string) == expected
-
-
-shutil.rmtree("parsed_mds", ignore_errors=True)
-shutil.rmtree("copies", ignore_errors=True)
-shutil.rmtree("if_mangled_files", ignore_errors=True)

From 6757b4f5eba4a105a1b5b94c6a9c720c25e74f2a Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 11:08:12 +0200
Subject: [PATCH 073/152] First attempt at splitting in paragraphs (need for
 other fixes for title-based-split first

---
 .../chatbot_parser.py                         | 45 +++++++++++++++----
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 43cb93c5c08..a148e7b2bbd 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -10,12 +10,12 @@
 
 #################### define macro's ####################
 # customizable macros
-MIN_PARAGRAPH_LENGTH = 128
+MIN_PARAGRAPH_LENGTH = 160
 MAX_TITLE_DEPTH = 4
 INCLUDE_LINKS_IN_PLAINTEXT = False
 SPLIT_ON_TITLES = True
-SPLIT_ON_PARAGRAPHS = False
-DEEP_DIRECTORIES = True
+SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES
+DEEP_DIRECTORIES = True and SPLIT_ON_TITLES  # Should always be False if SPLIT_ON_TITLES is False
 
 # directories
 PARSED_MDS = "parsed_mds"
@@ -219,8 +219,10 @@ def split_text(file, main_title):
     :return subtitle_order: list containing all encountered subtitles in order of appearance
     """
 
-    # start of assuming we haven't encountered a title
+    # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet
     after_first_title = False
+    after_first_paragraph = False
+    paragraph_number = 1
 
     # start of assuming we are not in a code_block
     in_code_block = False
@@ -241,6 +243,12 @@ def split_text(file, main_title):
     # variable to allow for if statements to "continue" over multiple paragraphs
     open_ifs = ""
 
+    # initialise the first paragraph if SPLIT_ON_PARAGRAPH is True
+    if SPLIT_ON_PARAGRAPHS:
+        title = main_title + "_paragraph_" + str(paragraph_number)
+        paragraphs_text[title] = ""
+        subtitle_order.append(title)
+
     # list to keep track of most recent directories on each title level
     if LINUX_TUTORIAL not in file:
         curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)]
@@ -258,18 +266,18 @@ def split_text(file, main_title):
 
             title_level = check_for_title(line, in_code_block, curr_dirs)
 
-            # detect codeblocks to make sure titles aren't detected in them
+            # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them
             if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
                 in_code_block = not in_code_block
 
             # line is a title with a maximum depth of 4
-            if title_level > 0:
+            if title_level > 0 and SPLIT_ON_TITLES:
                 if after_first_title:
                     paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title])
                     paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
                 title = make_valid_title(line[title_level + 1:-1])
 
-                # create an entry for the file in the paragraphs text dictionary
+                # create an entry for the next file in the paragraphs text dictionary
                 paragraphs_text[title] = open_ifs
 
                 after_first_title = True
@@ -278,8 +286,27 @@ def split_text(file, main_title):
                 # reset link_list
                 link_list = []
 
-            # line is not a title
-            elif after_first_title:
+            elif title_level > 0 and not SPLIT_ON_TITLES:
+                paragraphs_text[title] += line[title_level + 1:]
+
+            elif SPLIT_ON_PARAGRAPHS and line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH:
+                # finish the previous file
+                paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title])
+                paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
+
+                # start a new file
+                paragraph_number += 1
+                title = make_valid_title(main_title + "_paragraph_" + str(paragraph_number))
+                subtitle_order.append(title)
+
+                # create an entry for the next file in the paragraphs text dictionary
+                paragraphs_text[title] = open_ifs
+
+                # reset link_list
+                link_list = []
+
+            # line is not a title or the ending of a sufficiently large paragraph
+            elif after_first_title or SPLIT_ON_PARAGRAPHS:
                 line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
                 if title in paragraphs_text.keys() and line != "\n":
                     paragraphs_text[title] += line

From 6d9558d1ccf2dd9950586d50b167d74637120e26 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 11:21:42 +0200
Subject: [PATCH 074/152] make two functions for different ways of dividing the
 text

---
 .../chatbot_parser.py                         | 96 +++++++++++++++----
 1 file changed, 80 insertions(+), 16 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index a148e7b2bbd..1f6b82e8a44 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -219,10 +219,15 @@ def split_text(file, main_title):
     :return subtitle_order: list containing all encountered subtitles in order of appearance
     """
 
-    # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet
+    if SPLIT_ON_TITLES:
+        return split_on_titles(file, main_title)
+    elif SPLIT_ON_PARAGRAPHS:
+        return split_on_paragraphs(file, main_title)
+
+
+def split_on_titles(file, main_title):
+    # start of assuming we haven't encountered a title
     after_first_title = False
-    after_first_paragraph = False
-    paragraph_number = 1
 
     # start of assuming we are not in a code_block
     in_code_block = False
@@ -243,12 +248,6 @@ def split_text(file, main_title):
     # variable to allow for if statements to "continue" over multiple paragraphs
     open_ifs = ""
 
-    # initialise the first paragraph if SPLIT_ON_PARAGRAPH is True
-    if SPLIT_ON_PARAGRAPHS:
-        title = main_title + "_paragraph_" + str(paragraph_number)
-        paragraphs_text[title] = ""
-        subtitle_order.append(title)
-
     # list to keep track of most recent directories on each title level
     if LINUX_TUTORIAL not in file:
         curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)]
@@ -266,18 +265,19 @@ def split_text(file, main_title):
 
             title_level = check_for_title(line, in_code_block, curr_dirs)
 
-            # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them
+            # detect codeblocks to make sure titles aren't detected in them
             if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
                 in_code_block = not in_code_block
 
             # line is a title with a maximum depth of 4
-            if title_level > 0 and SPLIT_ON_TITLES:
+            if title_level > 0:
                 if after_first_title:
                     paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title])
-                    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
+                    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level,
+                                                                last_dir)
                 title = make_valid_title(line[title_level + 1:-1])
 
-                # create an entry for the next file in the paragraphs text dictionary
+                # create an entry for the file in the paragraphs text dictionary
                 paragraphs_text[title] = open_ifs
 
                 after_first_title = True
@@ -286,10 +286,74 @@ def split_text(file, main_title):
                 # reset link_list
                 link_list = []
 
-            elif title_level > 0 and not SPLIT_ON_TITLES:
+            # line is not a title
+            elif after_first_title:
+                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
+                if title in paragraphs_text.keys() and line != "\n":
+                    paragraphs_text[title] += line
+                elif line != "\n":
+                    paragraphs_text[title] = line
+
+    # write metadata for the last file
+    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level])
+
+    return paragraphs_text, paragraphs_metadata, subtitle_order
+
+
+def split_on_paragraphs(file, main_title):
+    # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet
+    after_first_title = False
+
+    # first paragraph number
+    paragraph_number = 1
+
+    # start of assuming we are not in a code_block
+    in_code_block = False
+
+    # define initial dictionaries
+    paragraphs_text = {}
+    paragraphs_metadata = {}
+
+    # list to keep track of links in the text
+    link_list = []
+
+    # list to keep track of the order of the subtitles
+    subtitle_order = []
+
+    # variable to keep track of the title level
+    title_level = 0
+
+    # initialise the first paragraph
+    title = main_title + "_paragraph_" + str(paragraph_number)
+    paragraphs_text[title] = ""
+    subtitle_order.append(title)
+
+    # list to keep track of most recent directories on each title level
+    if LINUX_TUTORIAL not in file:
+        curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)]
+    else:
+        curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)]
+
+    with open(file, 'r') as readfile:
+
+        for line in readfile:
+
+            # keep track of title level and directory to write to metadata upon discovering a new subtitle
+            if title_level > 0:
+                last_title_level = title_level
+                last_dir = curr_dirs[last_title_level]
+
+            title_level = check_for_title(line, in_code_block, curr_dirs)
+
+            # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them
+            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+                in_code_block = not in_code_block
+
+            # line is a title with a maximum depth of 4
+            if title_level > 0:
                 paragraphs_text[title] += line[title_level + 1:]
 
-            elif SPLIT_ON_PARAGRAPHS and line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH:
+            elif line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH:
                 # finish the previous file
                 paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title])
                 paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
@@ -306,7 +370,7 @@ def split_text(file, main_title):
                 link_list = []
 
             # line is not a title or the ending of a sufficiently large paragraph
-            elif after_first_title or SPLIT_ON_PARAGRAPHS:
+            else:
                 line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
                 if title in paragraphs_text.keys() and line != "\n":
                     paragraphs_text[title] += line

From 2c7025a8994fb0a2b0733be82185e706d1109fe9 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 11:23:06 +0200
Subject: [PATCH 075/152] added docstrings to new functions

---
 .../chatbot_parser.py                         | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 1f6b82e8a44..a9797026428 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -212,6 +212,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
 def split_text(file, main_title):
     """
     Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata
+
     :param file: the filepath of the file to be split
     :param main_title: the main title of the file
     :return paragraphs_text: dictionary containing the split sections of text
@@ -226,6 +227,15 @@ def split_text(file, main_title):
 
 
 def split_on_titles(file, main_title):
+    """
+    Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata
+
+    :param file: the filepath of the file to be split
+    :param main_title: the main title of the file
+    :return paragraphs_text: dictionary containing the split sections of text
+    :return paragraphs_metadata: dictionary containing the metadata of each split section of text
+    :return subtitle_order: list containing all encountered subtitles in order of appearance
+    """
     # start of assuming we haven't encountered a title
     after_first_title = False
 
@@ -301,6 +311,15 @@ def split_on_titles(file, main_title):
 
 
 def split_on_paragraphs(file, main_title):
+    """
+    Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata
+
+    :param file: the filepath of the file to be split
+    :param main_title: the main title of the file
+    :return paragraphs_text: dictionary containing the split sections of text
+    :return paragraphs_metadata: dictionary containing the metadata of each split section of text
+    :return subtitle_order: list containing all encountered subtitles in order of appearance
+    """
     # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet
     after_first_title = False
 

From ae99bb96f830da927f4dcded46d12404af8d16c1 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 12:10:18 +0200
Subject: [PATCH 076/152] update test for valid titles

---
 .../HPC_chatbot_preprocessor/tests/test_make_valid_title.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
index aebecddd0f3..fc704c84b31 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
@@ -6,10 +6,10 @@
 @pytest.mark.parametrize("input_string,expected", [
     ("", ""),
     ("A-good-filename-with-dashes", "A-good-filename-with-dashes"),
-    (" A very good filename beginning and ending in a space ", "A very good filename beginning and ending in a space"),
+    (" A very good filename beginning and ending in a space ", "A-very-good-filename-beginning-and-ending-in-a-space"),
     ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"),
-    ("A filename containing bad characters <>:\"/\\|?*\0", "A filename containing bad characters"),
-    ("A filename ending with {some jinja garbage}", "A filename ending with")
+    ("A filename containing bad characters <>:\"/\\|?*\0", "A-filename-containing-bad-characters"),
+    ("A filename ending with {some jinja garbage}", "A-filename-ending-with")
 ])
 def test_make_valid_title(input_string, expected):
     assert make_valid_title(input_string) == expected

From 084b4210a261e89c81d8e23d31d3e0d1adb7f00b Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 14:17:24 +0200
Subject: [PATCH 077/152] fixed problem with splitting os-specific text
 (metadata not fixed yet)

---
 .../chatbot_parser.py                         | 169 ++++++++++++------
 1 file changed, 117 insertions(+), 52 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index a9797026428..5739f23fb31 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -80,6 +80,13 @@
 # link indicators
 LINK_MARKER = r'§link§link§'
 
+# regex patterns
+IF_MANGLED_PATTERNS = {
+        IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})',
+        ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})',
+        ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})'
+    }
+
 
 ################### define functions ###################
 
@@ -243,9 +250,13 @@ def split_on_titles(file, main_title):
     in_code_block = False
 
     # define initial dictionaries
-    paragraphs_text = {}
+    paragraphs_os_free_text = {}
+    paragraphs_os_text = {}
     paragraphs_metadata = {}
 
+    # variable to keep track of the current paragraph
+    current_paragraph = ""
+
     # list to keep track of links in the text
     link_list = []
 
@@ -258,6 +269,12 @@ def split_on_titles(file, main_title):
     # variable to allow for if statements to "continue" over multiple paragraphs
     open_ifs = ""
 
+    # variable to keep track of how many if-statements deep the current line is
+    in_if_statement = 0
+
+    # variable to indicate that previous section was one with if-statements
+    previous_contained_if = False
+
     # list to keep track of most recent directories on each title level
     if LINUX_TUTORIAL not in file:
         curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)]
@@ -268,46 +285,63 @@ def split_on_titles(file, main_title):
 
         for line in readfile:
 
-            # keep track of title level and directory to write to metadata upon discovering a new subtitle
-            if title_level > 0:
-                last_title_level = title_level
-                last_dir = curr_dirs[last_title_level]
+            # detect if-statements starting or ending on the current line
+            in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(re.findall(IF_MANGLED_PATTERNS[ENDIF], line))
 
-            title_level = check_for_title(line, in_code_block, curr_dirs)
+            # only split up if current line is in a fully non-os-specific section
+            if in_if_statement == 0:
 
-            # detect codeblocks to make sure titles aren't detected in them
-            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
-                in_code_block = not in_code_block
+                title_level = check_for_title(line, in_code_block, curr_dirs)
 
-            # line is a title with a maximum depth of 4
-            if title_level > 0:
-                if after_first_title:
-                    paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title])
-                    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level,
-                                                                last_dir)
-                title = make_valid_title(line[title_level + 1:-1])
+                # detect codeblocks to make sure titles aren't detected in them
+                if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+                    in_code_block = not in_code_block
 
-                # create an entry for the file in the paragraphs text dictionary
-                paragraphs_text[title] = open_ifs
+                # line is a title with a maximum depth of 4
+                if title_level > 0:
+                    if after_first_title:
+                        if previous_contained_if:
+                            paragraphs_os_text[title] = current_paragraph
+                        else:
+                            paragraphs_os_free_text[title] = current_paragraph
+                        paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
+                    title = make_valid_title(line[title_level + 1:-1])
 
-                after_first_title = True
-                subtitle_order.append(title)
+                    # create an entry for the file in the paragraphs text dictionary
+                    current_paragraph = open_ifs
 
-                # reset link_list
-                link_list = []
+                    after_first_title = True
+                    subtitle_order.append(title)
+
+                    # reset link_list
+                    link_list = []
+
+                    previous_contained_if = False
 
-            # line is not a title
-            elif after_first_title:
+                # line is not a title
+                elif after_first_title:
+                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
+                    if line != "\n":
+                        current_paragraph += line
+
+                # keep track of title level and directory to write to metadata upon discovering a new subtitle
+                if title_level > 0:
+                    last_title_level = title_level
+                    last_dir = curr_dirs[last_title_level]
+            else:
+                previous_contained_if = True
                 line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
-                if title in paragraphs_text.keys() and line != "\n":
-                    paragraphs_text[title] += line
-                elif line != "\n":
-                    paragraphs_text[title] = line
+                if line != "\n":
+                    current_paragraph += line
 
-    # write metadata for the last file
-    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level])
+    # write dictionaries for the last file
+    if previous_contained_if:
+        paragraphs_os_text[title] = current_paragraph
+    else:
+        paragraphs_os_free_text[title] = current_paragraph
+    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level])
 
-    return paragraphs_text, paragraphs_metadata, subtitle_order
+    return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
 
 
 def split_on_paragraphs(file, main_title):
@@ -438,20 +472,15 @@ def close_ifs(text):
     :return prefix: the prefix for the next section
     """
 
-    patterns = {
-        IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})',
-        ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})',
-        ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})'
-    }
-    if_count = len(re.findall(patterns[IF], text.replace("\n", "")))
-    endif_count = len(re.findall(patterns[ENDIF], text.replace("\n", "")))
+    if_count = len(re.findall(IF_MANGLED_PATTERNS[IF], text.replace("\n", "")))
+    endif_count = len(re.findall(IF_MANGLED_PATTERNS[ENDIF], text.replace("\n", "")))
     if IF_MANGLED_PART not in text or if_count == endif_count:
         return text, ""
     else:
 
         # Find all matches for each pattern
         matches = []
-        for key, pattern in patterns.items():
+        for key, pattern in IF_MANGLED_PATTERNS.items():
             for match in re.finditer(pattern, text):
                 matches.append(match)
 
@@ -470,11 +499,11 @@ def close_ifs(text):
             last_if = -1
             last_else = -1
             for i, if_part in enumerate(open_ifs):
-                if re.search(patterns[IF], if_part):
+                if re.search(IF_MANGLED_PATTERNS[IF], if_part):
                     last_if = i
-                elif re.search(patterns[ELSE], if_part):
+                elif re.search(IF_MANGLED_PATTERNS[ELSE], if_part):
                     last_else = i
-                elif re.search(patterns[ENDIF], if_part):
+                elif re.search(IF_MANGLED_PATTERNS[ENDIF], if_part):
                     changed = True
                     del open_ifs[i]
                     if last_else > last_if:
@@ -650,7 +679,7 @@ def make_valid_title(title):
     valid_filename = re.sub(invalid_chars, '', title)
 
     # Strip leading/trailing whitespace
-    valid_filename = valid_filename.strip().strip('-')
+    valid_filename = valid_filename.strip().strip('-').replace(' ', '-')
 
     return valid_filename
 
@@ -700,7 +729,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or
         template = Template(paragraphs_text[title])
         text[OS] = template.render(OS=OS)
 
-        # readjust text to correct overcorrections
+        # re-adjust text to correct overcorrections
         text[OS] = re.sub('"' + OS + '"', OS, text[OS])
 
     # check that not all versions are the same
@@ -740,8 +769,11 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
 
     metadata = copy.deepcopy(paragraphs_metadata[title])
 
+    file_title = paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS])
+    file_title = title
+
     # write text file
-    with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + ".txt"), 'w') as writefile:
+    with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile:
         if LINKS in paragraphs_metadata[title].keys():
             adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS])
             writefile.write(adapted_text)
@@ -771,7 +803,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
         os_part = OS + "/"
     metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-')
 
-    with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + "_metadata.json"), 'w') as writefile:
+    with open(os.path.join(filepath, file_title + "_metadata.json"), 'w') as writefile:
         json.dump(metadata, writefile, indent=4)
 
     paragraph_numbers[OS] += 1
@@ -803,6 +835,39 @@ def insert_links(text, links):
     return text, new_links
 
 
+def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers):
+    # add first subtitle in front of section again
+    text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text
+
+    # Unmangle if's to use jinja parser
+    text = re.sub(IF_MANGLED_PART, "", text)
+
+    for OS in [LINUX, WINDOWS, MACOS]:
+
+        # slightly alter if-statements to be able to use predefined macros
+        text = re.sub(OS, '"' + OS + '"', text)
+
+        # Use jinja to render a different version of the text for each OS
+        template = Template(text)
+        jinja_text = template.render(OS=OS)
+
+        # re-adjust text to correct overcorrections
+        jinja_text = re.sub('"' + OS + '"', OS, jinja_text)
+
+        with open("jinja_file.txt", 'w') as writefile:
+            writefile.write(jinja_text)
+
+        # split in right way
+        _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE])
+
+        # write to files
+        for os_i, os_subtitle in enumerate(os_subtitle_order):
+            filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
+            os.makedirs(filepath, exist_ok=True)
+
+            write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:], os_i + i, filepath, OS, paragraph_numbers)
+
+
 def main():
     """
     main function
@@ -828,7 +893,7 @@ def main():
 
     ################### define loop-invariant variables ###################
 
-    # variable that keeps track of the source directories
+    # constant that keeps track of the source directories
     source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
                           os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
 
@@ -848,6 +913,7 @@ def main():
     # filenames_generic = {}
     # filenames_linux = {}
     # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
+    # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md"
     # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
 
     # for loops over all files
@@ -896,19 +962,18 @@ def main():
             jinja_parser(filename, copy_file)
 
             # split the text in paragraphs
-            paragraphs_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title)
+            paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title)
 
             # for every section, either make the whole section generic, or create an os-specific file for each OS
             for i, subtitle in enumerate(subtitle_order):
-                # print(subtitle)
 
                 # generic
-                if IF_MANGLED_PART not in paragraphs_text[subtitle]:
-                    write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers)
+                if subtitle in paragraphs_os_free_text.keys():
+                    write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers)
 
                 # os-specific
                 else:
-                    write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers)
+                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers)
 
     shutil.rmtree(COPIES, ignore_errors=True)
     shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)

From cf7f5f0c8a56303c155aea51268abe6ddbfe2944 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 15:33:50 +0200
Subject: [PATCH 078/152] fix for metadata of os-specific sections

---
 .../chatbot_parser.py                         | 93 +++++++++++++------
 1 file changed, 65 insertions(+), 28 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 5739f23fb31..10c61725244 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -62,7 +62,7 @@
 
 # Metadata attributes
 MAIN_TITLE = "main_title"
-SUBTITLE = "subtitle (incorrect in some cases, working on a fix)"
+SUBTITLE = "subtitle"
 TITLE_DEPTH = "title_depth"
 DIRECTORY = "directory"
 LINKS = "links"
@@ -300,11 +300,17 @@ def split_on_titles(file, main_title):
                 # line is a title with a maximum depth of 4
                 if title_level > 0:
                     if after_first_title:
+
+                        # write text of previous file
                         if previous_contained_if:
                             paragraphs_os_text[title] = current_paragraph
                         else:
                             paragraphs_os_free_text[title] = current_paragraph
+
+                        # write metadata of previous file
                         paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
+
+                    # make a new title
                     title = make_valid_title(line[title_level + 1:-1])
 
                     # create an entry for the file in the paragraphs text dictionary
@@ -697,11 +703,15 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
     :return:
     """
 
-    # make the directory needed for the files that will be written
-    filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
-    os.makedirs(filepath, exist_ok=True)
+    if len(paragraphs_text[title]) > 0:
+        # make the directory needed for the files that will be written
+        filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
+        os.makedirs(filepath, exist_ok=True)
 
-    write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers)
+        write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers)
+    else:
+        # don't write empty files
+        pass
 
 
 def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers):
@@ -835,7 +845,7 @@ def insert_links(text, links):
     return text, new_links
 
 
-def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers):
+def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers, all_metadata):
     # add first subtitle in front of section again
     text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text
 
@@ -860,12 +870,39 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, i, parag
         # split in right way
         _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE])
 
+        # prepare variables to fix metadata
+        total_subtitle_order = subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:]
+        copy_all_metadata = {**os_specific_metadata, **all_metadata}
+
         # write to files
         for os_i, os_subtitle in enumerate(os_subtitle_order):
-            filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
-            os.makedirs(filepath, exist_ok=True)
+            # check that file actually has some content
+            if len(os_specific_text[os_subtitle]) > 0:
+                # add the links to the metadata
+                os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS]
+
+                # fix parent in the metadata
+                parent_i = 0
+                parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1
+                parent = os_specific_metadata[os_subtitle][MAIN_TITLE]
+                while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order):
+                    if copy_all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth:
+                        parent = total_subtitle_order[parent_i]
+                    parent_i += 1
+                os_specific_metadata[os_subtitle][PARENT_TITLE] = parent
+
+                # fix directory in the metadata
+                os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(copy_all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE])
+
+                # make a directory to save the files
+                filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
+                os.makedirs(filepath, exist_ok=True)
 
-            write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:], os_i + i, filepath, OS, paragraph_numbers)
+                # write to files
+                write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + i, filepath, OS, paragraph_numbers)
+            else:
+                # don't write empty files
+                pass
 
 
 def main():
@@ -893,27 +930,27 @@ def main():
 
     ################### define loop-invariant variables ###################
 
-    # constant that keeps track of the source directories
-    source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
-                          os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
-
-    # list of all the filenames
-    filenames_generic = {}
-    filenames_linux = {}
-    for source_directory in source_directories:
-        all_items = os.listdir(source_directory)
-        files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
-        for file in files:
-            if LINUX_TUTORIAL in source_directory:
-                filenames_linux[file] = os.path.join(source_directory, file)
-            else:
-                filenames_generic[file] = os.path.join(source_directory, file)
-
-    # # Temporary variables to test with just one singular file
+    # # constant that keeps track of the source directories
+    # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
+    #                       os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
+    #
+    # # list of all the filenames
     # filenames_generic = {}
     # filenames_linux = {}
+    # for source_directory in source_directories:
+    #     all_items = os.listdir(source_directory)
+    #     files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+    #     for file in files:
+    #         if LINUX_TUTORIAL in source_directory:
+    #             filenames_linux[file] = os.path.join(source_directory, file)
+    #         else:
+    #             filenames_generic[file] = os.path.join(source_directory, file)
+
+    # Temporary variables to test with just one singular file
+    filenames_generic = {}
+    filenames_linux = {}
     # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
-    # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md"
+    filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md"
     # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
 
     # for loops over all files
@@ -973,7 +1010,7 @@ def main():
 
                 # os-specific
                 else:
-                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers)
+                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers, paragraphs_metadata)
 
     shutil.rmtree(COPIES, ignore_errors=True)
     shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)

From b7c10d3c2764ad91880c5c17aa60d14cd337bc51 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 16:03:52 +0200
Subject: [PATCH 079/152] clean up temporary version

---
 .../chatbot_parser.py                         | 115 +++++++-----------
 1 file changed, 42 insertions(+), 73 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 10c61725244..5c1a4b3facd 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -714,54 +714,6 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
         pass
 
 
-def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers):
-    """
-    Function that writes text and metadata of os-specific files
-
-    :param title: title of section
-    :param paragraphs_text: dictionary containing all paragraphs of text
-    :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
-    :param title_order: list containing all subtitles in order
-    :param title_order_number: order number of the title of the section that is being written
-    :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS
-    :return:
-    """
-    text = {}
-    for OS in [LINUX, WINDOWS, MACOS]:
-
-        # Unmangle if's to use jinja parser
-        paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title])
-
-        # slightly alter if-statements to be able to use predefined macros
-        paragraphs_text[title] = re.sub(OS, '"' + OS + '"', paragraphs_text[title])
-
-        # Use jinja to render a different version of the text for each OS
-        template = Template(paragraphs_text[title])
-        text[OS] = template.render(OS=OS)
-
-        # re-adjust text to correct overcorrections
-        text[OS] = re.sub('"' + OS + '"', OS, text[OS])
-
-    # check that not all versions are the same
-    unique_texts = set(text.values())
-    if len(unique_texts) == 1:
-        paragraphs_text[title] = text[OS]
-        write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers)
-    else:
-        for OS in [LINUX, WINDOWS, MACOS]:
-            # check that file actually has some content
-            if len(text[OS]) > 0:
-                # define the filepath
-                filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title][DIRECTORY])
-                os.makedirs(filepath, exist_ok=True)
-
-                # write the files
-                write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers=paragraph_numbers)
-            else:
-                # don't write empty files
-                pass
-
-
 def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers):
     """
     Function to write files to a certain filepath
@@ -845,7 +797,18 @@ def insert_links(text, links):
     return text, new_links
 
 
-def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers, all_metadata):
+def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, paragraph_numbers, all_metadata):
+    """
+    Function that splits os-specific sections into subtitles, parses them using jinja and writes them away
+
+    :param text: full os specific section
+    :param metadata: metadata generated for the full os specific section
+    :param subtitle_order: order of the subtitles generated by the splitter
+    :param title_order_number: order number of the section
+    :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS
+    :param all_metadata: all metadata generated by the splitter
+    :return:
+    """
     # add first subtitle in front of section again
     text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text
 
@@ -871,35 +834,39 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, i, parag
         _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE])
 
         # prepare variables to fix metadata
-        total_subtitle_order = subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:]
-        copy_all_metadata = {**os_specific_metadata, **all_metadata}
+        total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:]
+        all_metadata.update(os_specific_metadata)
 
         # write to files
         for os_i, os_subtitle in enumerate(os_subtitle_order):
             # check that file actually has some content
             if len(os_specific_text[os_subtitle]) > 0:
                 # add the links to the metadata
-                os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS]
+                if LINKS in metadata.keys():
+                    os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS]
 
                 # fix parent in the metadata
                 parent_i = 0
                 parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1
                 parent = os_specific_metadata[os_subtitle][MAIN_TITLE]
                 while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order):
-                    if copy_all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth:
+                    if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth:
                         parent = total_subtitle_order[parent_i]
                     parent_i += 1
                 os_specific_metadata[os_subtitle][PARENT_TITLE] = parent
 
                 # fix directory in the metadata
-                os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(copy_all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE])
+                if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]:
+                    os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE])
+                else:
+                    os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE])
 
                 # make a directory to save the files
                 filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
                 os.makedirs(filepath, exist_ok=True)
 
                 # write to files
-                write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + i, filepath, OS, paragraph_numbers)
+                write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers)
             else:
                 # don't write empty files
                 pass
@@ -930,27 +897,27 @@ def main():
 
     ################### define loop-invariant variables ###################
 
-    # # constant that keeps track of the source directories
-    # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
-    #                       os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
-    #
-    # # list of all the filenames
-    # filenames_generic = {}
-    # filenames_linux = {}
-    # for source_directory in source_directories:
-    #     all_items = os.listdir(source_directory)
-    #     files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
-    #     for file in files:
-    #         if LINUX_TUTORIAL in source_directory:
-    #             filenames_linux[file] = os.path.join(source_directory, file)
-    #         else:
-    #             filenames_generic[file] = os.path.join(source_directory, file)
-
-    # Temporary variables to test with just one singular file
+    # constant that keeps track of the source directories
+    source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
+                          os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
+
+    # list of all the filenames
     filenames_generic = {}
     filenames_linux = {}
+    for source_directory in source_directories:
+        all_items = os.listdir(source_directory)
+        files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+        for file in files:
+            if LINUX_TUTORIAL in source_directory:
+                filenames_linux[file] = os.path.join(source_directory, file)
+            else:
+                filenames_generic[file] = os.path.join(source_directory, file)
+
+    # # Temporary variables to test with just one singular file
+    # filenames_generic = {}
+    # filenames_linux = {}
     # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
-    filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md"
+    # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md"
     # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
 
     # for loops over all files
@@ -1012,8 +979,10 @@ def main():
                 else:
                     split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers, paragraphs_metadata)
 
+    # clean up temporary directories and files
     shutil.rmtree(COPIES, ignore_errors=True)
     shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)
+    os.remove("jinja_file.txt")
 
 
 ################### run the script ###################

From 4a441f34ca1ad6f296817fface9c86fe76585250 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 16:23:57 +0200
Subject: [PATCH 080/152] added command line options for custom macros

---
 .../chatbot_parser.py                         | 32 +++++++++++++++----
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 5c1a4b3facd..e15a76318c4 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -1,3 +1,4 @@
+import argparse
 import copy
 import json
 import os
@@ -9,13 +10,7 @@
 from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template
 
 #################### define macro's ####################
-# customizable macros
-MIN_PARAGRAPH_LENGTH = 160
-MAX_TITLE_DEPTH = 4
-INCLUDE_LINKS_IN_PLAINTEXT = False
-SPLIT_ON_TITLES = True
-SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES
-DEEP_DIRECTORIES = True and SPLIT_ON_TITLES  # Should always be False if SPLIT_ON_TITLES is False
+# customizable macros (default values are defined at the bottom of the script)
 
 # directories
 PARSED_MDS = "parsed_mds"
@@ -987,6 +982,29 @@ def main():
 
 ################### run the script ###################
 if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot")
+
+    # adding command-line options
+
+    parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)")
+    parser.add_argument("-pl", "--paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)")
+    parser.add_argument("-td", "--title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)")
+    parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
+
+    args = parser.parse_args()
+
+    SPLIT_ON_TITLES = bool(args.split_on_titles)
+    MIN_PARAGRAPH_LENGTH = args.paragraph_length
+    MAX_TITLE_DEPTH = args.title_depth
+    INCLUDE_LINKS_IN_PLAINTEXT = args.links
+    SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES
+    DEEP_DIRECTORIES = True and SPLIT_ON_TITLES  # Should always be False if SPLIT_ON_TITLES is False
+
+    print(SPLIT_ON_TITLES)
+    print(MIN_PARAGRAPH_LENGTH)
+    print(MAX_TITLE_DEPTH)
+    print(INCLUDE_LINKS_IN_PLAINTEXT)
+
     print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
     main()
     print("Parsing finished successfully")

From 662134fbf7b7bfd53a358f40d43c5a329fd5bab8 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 16:25:34 +0200
Subject: [PATCH 081/152] small fix to macros

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index e15a76318c4..0f7345e8149 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -96,7 +96,7 @@ def check_for_title(line, in_code_block, curr_dirs):
     """
     # detect titles
     match = re.match(r'^#+ ', line)
-    if match and len(match.group(0)) <= 5 and not in_code_block:
+    if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1 and not in_code_block:
         title_length = len(match.group(0)) - 1
         if DEEP_DIRECTORIES:
             curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-')))

From 05eab4ae23dc86f45b6eccbef36e31e8869a30c9 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 16:46:30 +0200
Subject: [PATCH 082/152] clean up test for valid title

---
 scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
index fc704c84b31..225c368477d 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
@@ -1,5 +1,4 @@
 import pytest
-import shutil
 from chatbot_parser import make_valid_title
 
 

From b85a8fba96a7a5bd02dccef7a7f3cae34420f9b1 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 22 Aug 2024 16:55:33 +0200
Subject: [PATCH 083/152] add a test for write_metadata

---
 .../tests/test_write_metadata.py                  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
new file mode 100644
index 00000000000..68f1772cb24
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
@@ -0,0 +1,15 @@
+import pytest
+import os
+from chatbot_parser import write_metadata
+
+
+@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,output", [
+    ("", "", [], 1, "", {"main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}),
+    ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2,
+     os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"),
+     {"main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2,
+      "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"),
+      "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}})
+])
+def test_write_metadata(main_title, subtitle, links, title_level, directory, output):
+    assert write_metadata(main_title, subtitle, links, title_level, directory) == output

From 39a3c99f68464b1614da8bbaaa68adac0aeea889 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 23 Aug 2024 11:41:20 +0200
Subject: [PATCH 084/152] added functionality to split on paragraphs

---
 .../chatbot_parser.py                         | 353 +++++++++---------
 1 file changed, 180 insertions(+), 173 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 0f7345e8149..9b6fced3636 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -10,7 +10,13 @@
 from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template
 
 #################### define macro's ####################
-# customizable macros (default values are defined at the bottom of the script)
+# customizable macros (customization made possible at the bottom of the script)
+SPLIT_ON_TITLES = True
+MIN_PARAGRAPH_LENGTH = 160
+MAX_TITLE_DEPTH = 4
+INCLUDE_LINKS_IN_PLAINTEXT = False
+SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES
+DEEP_DIRECTORIES = True and SPLIT_ON_TITLES  # Should always be False if SPLIT_ON_TITLES is False
 
 # directories
 PARSED_MDS = "parsed_mds"
@@ -31,6 +37,7 @@
 WINDOWS = "windows"
 MACOS = "macos"
 GENERIC = "generic"
+LINK_OS = {LINUX: "Linux", WINDOWS: "Windows", MACOS: "macOS"}  # OS needs different capitalisation for use in links
 
 # urls
 REPO_URL = 'https://github.com/hpcugent/vsc_user_docs'
@@ -82,6 +89,11 @@
         ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})'
     }
 
+# filenames (and parts of filenames)
+TEMP_JINJA_FILE = "jinja_file.txt"
+_PARAGRAPH_ = "_paragraph_"
+METADATA_EXTENSION = "_metadata"
+
 
 ################### define functions ###################
 
@@ -211,12 +223,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     return curr_line, linklist
 
 
-def split_text(file, main_title):
+def split_text(file, main_title, current_paragraph_number=-1, OS=GENERIC):
     """
     Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata
 
     :param file: the filepath of the file to be split
     :param main_title: the main title of the file
+    :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level
+    :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level
     :return paragraphs_text: dictionary containing the split sections of text
     :return paragraphs_metadata: dictionary containing the metadata of each split section of text
     :return subtitle_order: list containing all encountered subtitles in order of appearance
@@ -225,7 +239,7 @@ def split_text(file, main_title):
     if SPLIT_ON_TITLES:
         return split_on_titles(file, main_title)
     elif SPLIT_ON_PARAGRAPHS:
-        return split_on_paragraphs(file, main_title)
+        return split_on_paragraphs(file, main_title, current_paragraph_number, OS)
 
 
 def split_on_titles(file, main_title):
@@ -258,12 +272,6 @@ def split_on_titles(file, main_title):
     # list to keep track of the order of the subtitles
     subtitle_order = []
 
-    # variable to keep track of the title level
-    title_level = 0
-
-    # variable to allow for if statements to "continue" over multiple paragraphs
-    open_ifs = ""
-
     # variable to keep track of how many if-statements deep the current line is
     in_if_statement = 0
 
@@ -309,7 +317,7 @@ def split_on_titles(file, main_title):
                     title = make_valid_title(line[title_level + 1:-1])
 
                     # create an entry for the file in the paragraphs text dictionary
-                    current_paragraph = open_ifs
+                    current_paragraph = ""
 
                     after_first_title = True
                     subtitle_order.append(title)
@@ -345,42 +353,46 @@ def split_on_titles(file, main_title):
     return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
 
 
-def split_on_paragraphs(file, main_title):
+def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERIC):
     """
     Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata
 
     :param file: the filepath of the file to be split
     :param main_title: the main title of the file
+    :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph
+    :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph
     :return paragraphs_text: dictionary containing the split sections of text
     :return paragraphs_metadata: dictionary containing the metadata of each split section of text
     :return subtitle_order: list containing all encountered subtitles in order of appearance
     """
-    # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet
-    after_first_title = False
-
-    # first paragraph number
-    paragraph_number = 1
-
     # start of assuming we are not in a code_block
     in_code_block = False
 
     # define initial dictionaries
-    paragraphs_text = {}
+    paragraphs_os_free_text = {}
+    paragraphs_os_text = {}
     paragraphs_metadata = {}
 
+    # variable to keep track of the current paragraph
+    current_paragraph = ""
+
     # list to keep track of links in the text
     link_list = []
 
     # list to keep track of the order of the subtitles
     subtitle_order = []
 
-    # variable to keep track of the title level
-    title_level = 0
+    # variable to keep track of how many if-statements deep the current line is
+    in_if_statement = 0
+
+    # variable to indicate that previous section was one with if-statements
+    previous_contained_if = False
+
+    # paragraph number to add to title
+    paragraph_number = 1
 
-    # initialise the first paragraph
-    title = main_title + "_paragraph_" + str(paragraph_number)
-    paragraphs_text[title] = ""
-    subtitle_order.append(title)
+    # metadata title
+    metadata_title = main_title
 
     # list to keep track of most recent directories on each title level
     if LINUX_TUTORIAL not in file:
@@ -392,49 +404,85 @@ def split_on_paragraphs(file, main_title):
 
         for line in readfile:
 
-            # keep track of title level and directory to write to metadata upon discovering a new subtitle
-            if title_level > 0:
-                last_title_level = title_level
-                last_dir = curr_dirs[last_title_level]
+            # detect if-statements starting or ending on the current line
+            in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(
+                re.findall(IF_MANGLED_PATTERNS[ENDIF], line))
+
+            # only split up if current line is in a fully non-os-specific section
+            if in_if_statement == 0:
+
+                title_level = check_for_title(line, in_code_block, curr_dirs)
+
+                # detect codeblocks to make sure titles aren't detected in them
+                if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+                    in_code_block = not in_code_block
 
-            title_level = check_for_title(line, in_code_block, curr_dirs)
+                # check whether a new paragraph should be started
+                if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= MIN_PARAGRAPH_LENGTH and not in_code_block:
 
-            # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them
-            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
-                in_code_block = not in_code_block
+                    # create a title for the previous paragraph
+                    if current_paragraph_number == -1:
+                        paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number)
+                    else:
+                        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number)
+                    paragraph_number += 1
 
-            # line is a title with a maximum depth of 4
-            if title_level > 0:
-                paragraphs_text[title] += line[title_level + 1:]
+                    # write text of previous file
+                    if previous_contained_if:
+                        paragraphs_os_text[paragraph_title] = current_paragraph
+                    else:
+                        paragraphs_os_free_text[paragraph_title] = current_paragraph
 
-            elif line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH:
-                # finish the previous file
-                paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title])
-                paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
+                    # write metadata of previous file
+                    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir)
+                    subtitle_order.append(paragraph_title)
 
-                # start a new file
-                paragraph_number += 1
-                title = make_valid_title(main_title + "_paragraph_" + str(paragraph_number))
-                subtitle_order.append(title)
+                    # reset the current paragraph
+                    current_paragraph = ""
 
-                # create an entry for the next file in the paragraphs text dictionary
-                paragraphs_text[title] = open_ifs
+                    # reset link_list
+                    link_list = []
 
-                # reset link_list
-                link_list = []
+                    previous_contained_if = False
 
-            # line is not a title or the ending of a sufficiently large paragraph
+                # line is a title with a maximum depth of 4
+                elif title_level > 0:
+
+                    # make a new title
+                    metadata_title = make_valid_title(line[title_level + 1:-1])
+
+                    line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title)
+                    current_paragraph += line
+
+                # line is not a title or the beginning of a new paragraph
+                elif line != "\n" or previous_contained_if:
+                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
+                    current_paragraph += line
+
+                # keep track of title level and directory to write to metadata upon discovering a new subtitle
+                if title_level > 0:
+                    last_title_level = title_level
+                    last_dir = curr_dirs[last_title_level]
             else:
+                previous_contained_if = True
                 line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
-                if title in paragraphs_text.keys() and line != "\n":
-                    paragraphs_text[title] += line
-                elif line != "\n":
-                    paragraphs_text[title] = line
+                current_paragraph += line
 
-    # write metadata for the last file
-    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level])
+    # create a title for the last paragraph
+    if current_paragraph_number == -1:
+        paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number)
+    else:
+        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number)
 
-    return paragraphs_text, paragraphs_metadata, subtitle_order
+    # write dictionaries for the last file
+    if previous_contained_if:
+        paragraphs_os_text[paragraph_title] = current_paragraph
+    else:
+        paragraphs_os_free_text[paragraph_title] = current_paragraph
+    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level])
+    subtitle_order.append(paragraph_title)
+
+    return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
 
 
 def write_metadata(main_title, subtitle, links, title_level, directory):
@@ -461,63 +509,6 @@ def write_metadata(main_title, subtitle, links, title_level, directory):
     return paragraph_metadata
 
 
-def close_ifs(text):
-    """
-    Function to check whether all if-statements in a section are closed properly. If that is not the case, the function
-    closes all if-statements at the end of the section and returns a prefix for the next section containing all if-statements
-    of the section it is processing. This needs to be done because the start of the next section would also be contained within the
-    last unclosed if-statement of its previous section.
-
-    :param text: the text of the section it checks
-    :return text: the adapted text where all if-statements are closed
-    :return prefix: the prefix for the next section
-    """
-
-    if_count = len(re.findall(IF_MANGLED_PATTERNS[IF], text.replace("\n", "")))
-    endif_count = len(re.findall(IF_MANGLED_PATTERNS[ENDIF], text.replace("\n", "")))
-    if IF_MANGLED_PART not in text or if_count == endif_count:
-        return text, ""
-    else:
-
-        # Find all matches for each pattern
-        matches = []
-        for key, pattern in IF_MANGLED_PATTERNS.items():
-            for match in re.finditer(pattern, text):
-                matches.append(match)
-
-        # sort the matches according to their start index
-        matches.sort(key=lambda x: x.start())
-
-        # extract the strings from the matches
-        open_ifs = []
-        for match in matches:
-            open_ifs.append(match.group(0))
-
-        # only include the non-closed if-statements
-        changed = True
-        while changed:
-            changed = False
-            last_if = -1
-            last_else = -1
-            for i, if_part in enumerate(open_ifs):
-                if re.search(IF_MANGLED_PATTERNS[IF], if_part):
-                    last_if = i
-                elif re.search(IF_MANGLED_PATTERNS[ELSE], if_part):
-                    last_else = i
-                elif re.search(IF_MANGLED_PATTERNS[ENDIF], if_part):
-                    changed = True
-                    del open_ifs[i]
-                    if last_else > last_if:
-                        del open_ifs[last_else]
-                    del open_ifs[last_if]
-                    break
-
-        # Concatenate all matches into a single string
-        open_ifs = ''.join(open_ifs)
-
-        return text + (r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}')*(if_count - endif_count), open_ifs
-
-
 def jinja_parser(filename, copy_location):
     """
     function that let's jinja do its thing to format the files except for the os-related if-statements
@@ -596,7 +587,7 @@ def mangle_os_ifs(line, is_os):
 
         constr_match = re.search(r'\{%.*?%}', match.string)
         if_match = re.search(r'if ', match.group(1))
-        if_os_match = re.search(r'if OS ', match.group(1))
+        if_os_match = re.search(r'if OS', match.group(1))
         endif_match = re.search(r'endif', match.group(1))
         else_match = re.search(r'else', match.group(1))
 
@@ -726,7 +717,6 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
 
     metadata = copy.deepcopy(paragraphs_metadata[title])
 
-    file_title = paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS])
     file_title = title
 
     # write text file
@@ -757,10 +747,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
     if OS == GENERIC:
         os_part = ""
     else:
-        os_part = OS + "/"
-    metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-')
+        os_part = LINK_OS[OS] + "/"
+    metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-')
 
-    with open(os.path.join(filepath, file_title + "_metadata.json"), 'w') as writefile:
+    with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile:
         json.dump(metadata, writefile, indent=4)
 
     paragraph_numbers[OS] += 1
@@ -804,8 +794,6 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
     :param all_metadata: all metadata generated by the splitter
     :return:
     """
-    # add first subtitle in front of section again
-    text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text
 
     # Unmangle if's to use jinja parser
     text = re.sub(IF_MANGLED_PART, "", text)
@@ -819,52 +807,74 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
         template = Template(text)
         jinja_text = template.render(OS=OS)
 
-        # re-adjust text to correct overcorrections
-        jinja_text = re.sub('"' + OS + '"', OS, jinja_text)
-
-        with open("jinja_file.txt", 'w') as writefile:
-            writefile.write(jinja_text)
-
-        # split in right way
-        _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE])
-
-        # prepare variables to fix metadata
-        total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:]
-        all_metadata.update(os_specific_metadata)
-
-        # write to files
-        for os_i, os_subtitle in enumerate(os_subtitle_order):
-            # check that file actually has some content
-            if len(os_specific_text[os_subtitle]) > 0:
-                # add the links to the metadata
-                if LINKS in metadata.keys():
-                    os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS]
-
-                # fix parent in the metadata
-                parent_i = 0
-                parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1
-                parent = os_specific_metadata[os_subtitle][MAIN_TITLE]
-                while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order):
-                    if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth:
-                        parent = total_subtitle_order[parent_i]
-                    parent_i += 1
-                os_specific_metadata[os_subtitle][PARENT_TITLE] = parent
-
-                # fix directory in the metadata
-                if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]:
-                    os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE])
-                else:
-                    os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE])
+        # add first subtitle in front of section again
+        if len(jinja_text) != 0:
+            jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text
 
-                # make a directory to save the files
-                filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
-                os.makedirs(filepath, exist_ok=True)
+            # re-adjust text to correct overcorrections
+            jinja_text = re.sub('"' + OS + '"', OS, jinja_text)
+
+            if LINUX_TUTORIAL not in metadata[DIRECTORY]:
+                with open(TEMP_JINJA_FILE, 'w') as writefile:
+                    writefile.write(jinja_text)
+
+                # split in right way
+                _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
 
-                # write to files
-                write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers)
             else:
-                # don't write empty files
-                pass
+                os.makedirs(LINUX_TUTORIAL, exist_ok=True)
+                with open(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), 'w') as writefile:
+                    writefile.write(jinja_text)
+
+                # split in right way
+                _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
+
+            # prepare variables to fix metadata
+            total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:]
+            all_metadata.update(os_specific_metadata)
+
+            # write to files
+            for os_i, os_subtitle in enumerate(os_subtitle_order):
+                # check that file actually has some content
+                if len(os_specific_text[os_subtitle]) > 0:
+                    # add the links to the metadata
+                    if LINKS in metadata.keys():
+                        os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS]
+
+                    # fix parent in the metadata
+                    parent_i = 0
+                    parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1
+                    parent = os_specific_metadata[os_subtitle][MAIN_TITLE]
+
+                    while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order):
+                        if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth:
+                            parent = total_subtitle_order[parent_i]
+                        parent_i += 1
+
+                    if SPLIT_ON_PARAGRAPHS and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]:
+                        os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE]
+                    else:
+                        os_specific_metadata[os_subtitle][PARENT_TITLE] = parent
+
+                    # fix directory in the metadata if needed
+                    if DEEP_DIRECTORIES:
+                        if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]:
+                            os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE])
+                        else:
+                            os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE])
+
+                    # make a directory to save the files
+                    filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
+                    os.makedirs(filepath, exist_ok=True)
+
+                    # write to files
+                    write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers)
+                else:
+                    # don't write empty files
+                    pass
+        else:
+            # don't split empty texts
+            pass
 
 
 def main():
@@ -913,7 +923,7 @@ def main():
     # filenames_linux = {}
     # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
     # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md"
-    # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md"
+    # filenames_linux["common_pitfalls.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/common_pitfalls.md"
 
     # for loops over all files
     for filenames in [filenames_generic, filenames_linux]:
@@ -977,7 +987,9 @@ def main():
     # clean up temporary directories and files
     shutil.rmtree(COPIES, ignore_errors=True)
     shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)
-    os.remove("jinja_file.txt")
+    shutil.rmtree(LINUX_TUTORIAL, ignore_errors=True)
+    if os.path.exists(TEMP_JINJA_FILE):
+        os.remove(TEMP_JINJA_FILE)
 
 
 ################### run the script ###################
@@ -987,24 +999,19 @@ def main():
     # adding command-line options
 
     parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)")
-    parser.add_argument("-pl", "--paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)")
-    parser.add_argument("-td", "--title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)")
+    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)")
+    parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)")
     parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
 
     args = parser.parse_args()
 
     SPLIT_ON_TITLES = bool(args.split_on_titles)
-    MIN_PARAGRAPH_LENGTH = args.paragraph_length
-    MAX_TITLE_DEPTH = args.title_depth
+    MIN_PARAGRAPH_LENGTH = args.min_paragraph_length
+    MAX_TITLE_DEPTH = args.max_title_depth
     INCLUDE_LINKS_IN_PLAINTEXT = args.links
     SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES
     DEEP_DIRECTORIES = True and SPLIT_ON_TITLES  # Should always be False if SPLIT_ON_TITLES is False
 
-    print(SPLIT_ON_TITLES)
-    print(MIN_PARAGRAPH_LENGTH)
-    print(MAX_TITLE_DEPTH)
-    print(INCLUDE_LINKS_IN_PLAINTEXT)
-
     print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
     main()
     print("Parsing finished successfully")

From af9e6cca6ead2ded6ac54500e7e84ed26939aa12 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 23 Aug 2024 12:02:22 +0200
Subject: [PATCH 085/152] clean up

---
 .../chatbot_parser.py                         | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 9b6fced3636..ca861b86e81 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -676,7 +676,7 @@ def make_valid_title(title):
     return valid_filename
 
 
-def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers):
+def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number):
     """
     Function that writes text and metadata of a generic (non-os-specific) file
 
@@ -685,7 +685,6 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
     :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
     :param title_order: list containing all subtitles in order
     :param title_order_number: order number of the title of the section that is being written
-    :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS
     :return:
     """
 
@@ -694,13 +693,13 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
         filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
         os.makedirs(filepath, exist_ok=True)
 
-        write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers)
+        write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC)
     else:
         # don't write empty files
         pass
 
 
-def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers):
+def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS):
     """
     Function to write files to a certain filepath
 
@@ -711,7 +710,6 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
     :param title_order_number: order number of the title of the section that is being written
     :param filepath: filepath to write files to
     :param OS: OS to be included in the metadata
-    :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS
     :return:
     """
 
@@ -728,18 +726,22 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
             writefile.write(text)
 
     # write metadata
+    # add previous subtitle
     if title_order_number != 0:
         metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1]
     else:
         metadata[PREVIOUS_SUBTITLE] = None
 
+    # add next subtitle
     if title_order_number != len(title_order) - 1:
         metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1]
     else:
         metadata[NEXT_SUBTITLE] = None
 
+    # add OS
     metadata[METADATA_OS] = OS
 
+    # add reference link
     if bool(LINUX_TUTORIAL in paragraphs_metadata[title][DIRECTORY]):
         linux_part = LINUX_TUTORIAL + "/"
     else:
@@ -750,11 +752,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
         os_part = LINK_OS[OS] + "/"
     metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-')
 
+    # write metadata to file
     with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile:
         json.dump(metadata, writefile, indent=4)
 
-    paragraph_numbers[OS] += 1
-
 
 def insert_links(text, links):
     """
@@ -782,7 +783,7 @@ def insert_links(text, links):
     return text, new_links
 
 
-def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, paragraph_numbers, all_metadata):
+def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata):
     """
     Function that splits os-specific sections into subtitles, parses them using jinja and writes them away
 
@@ -790,7 +791,6 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
     :param metadata: metadata generated for the full os specific section
     :param subtitle_order: order of the subtitles generated by the splitter
     :param title_order_number: order number of the section
-    :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS
     :param all_metadata: all metadata generated by the splitter
     :return:
     """
@@ -868,7 +868,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
                     os.makedirs(filepath, exist_ok=True)
 
                     # write to files
-                    write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers)
+                    write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS)
                 else:
                     # don't write empty files
                     pass
@@ -978,11 +978,11 @@ def main():
 
                 # generic
                 if subtitle in paragraphs_os_free_text.keys():
-                    write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers)
+                    write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i)
 
                 # os-specific
                 else:
-                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers, paragraphs_metadata)
+                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers)
 
     # clean up temporary directories and files
     shutil.rmtree(COPIES, ignore_errors=True)

From f4163a7d3cb94ab4962f7c24d8a78906064d59a6 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 23 Aug 2024 12:04:32 +0200
Subject: [PATCH 086/152] clean up

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index ca861b86e81..6be841ae2e5 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -994,7 +994,7 @@ def main():
 
 ################### run the script ###################
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot")
+    parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n")
 
     # adding command-line options
 

From 833f96488dacf631782afe14081202021f50e9f0 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 23 Aug 2024 12:12:48 +0200
Subject: [PATCH 087/152] further clean up and added shebang

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 6be841ae2e5..76627065004 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import argparse
 import copy
 import json
@@ -958,9 +960,6 @@ def main():
             # variable that keeps track of the directories that are used to write in at different levels
             curr_dirs = [filename[:-3] for _ in range(5)]
 
-            # dictionary that keeps track of the paragraph numbers
-            paragraph_numbers = {GENERIC: 1, LINUX: 1, WINDOWS: 1, MACOS: 1}
-
             ################### actually parse the md file ###################
 
             # create directories for the source markdown file
@@ -982,7 +981,7 @@ def main():
 
                 # os-specific
                 else:
-                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers)
+                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata)
 
     # clean up temporary directories and files
     shutil.rmtree(COPIES, ignore_errors=True)

From 79b1a56d5a4742c5e96663f1b4c0b41fba68728d Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 23 Aug 2024 12:13:50 +0200
Subject: [PATCH 088/152] clean up

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 76627065004..c7dbe205737 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -1010,7 +1010,7 @@ def main():
     INCLUDE_LINKS_IN_PLAINTEXT = args.links
     SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES
     DEEP_DIRECTORIES = True and SPLIT_ON_TITLES  # Should always be False if SPLIT_ON_TITLES is False
-
-    print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
+    if DEEP_DIRECTORIES:
+        print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
     main()
     print("Parsing finished successfully")

From cec154c64ac9cf4bb34cc9e1ccd9f3d96ea656e7 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 23 Aug 2024 13:54:56 +0200
Subject: [PATCH 089/152] added test for if mangler

---
 .../tests/example_files/example_text_1.md     | 31 +++++++++++
 .../if_mangler_1_input.md                     |  4 ++
 .../if_mangler_1_output.md                    |  4 ++
 .../if_mangler_2_input.md                     |  7 +++
 .../if_mangler_2_output.md                    |  7 +++
 .../if_mangler_3_input.md                     |  6 ++
 .../if_mangler_3_output.md                    |  6 ++
 .../if_mangler_4_input.md                     |  4 ++
 .../if_mangler_4_output.md                    |  4 ++
 .../if_mangler_5_input.md                     | 11 ++++
 .../if_mangler_5_output.md                    | 11 ++++
 .../if_mangler_6_input.md                     |  8 +++
 .../if_mangler_6_output.md                    |  8 +++
 .../if_mangler_7_input.md                     |  9 +++
 .../if_mangler_7_output.md                    |  9 +++
 .../if_mangler_test_files/if_mangler_input.md | 55 +++++++++++++++++++
 .../if_mangler_output.md                      | 55 +++++++++++++++++++
 .../tests/test_if_mangler.py                  | 32 +++++++++++
 18 files changed, 271 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py

diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md
new file mode 100644
index 00000000000..9b810c3f41a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md
@@ -0,0 +1,31 @@
+# Main title
+
+## Subtitle 1
+
+blablabla
+blablablabla
+
+### Subtitle 2 partly generic
+
+blablabla generic
+blablabla generic
+{% if OS == windows %}blablabla windows
+blablabla windows with a [link](windows.md)
+
+#### Subtitle 3 Windows specific
+
+blablabla windows
+blablablabla windows
+{% else %}blablabla Linux macOS
+blablablabla Linux macOS with a [link](linuxmacos.md)
+
+#### Subtitle 4 Linux and macOS specific
+
+blablabla Linux macOS
+blablablabla Linux macOS
+{% endif %}
+blablabla generic with a [link](generic.md)
+
+## Subtitle 5 generic
+
+blablabla
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md
new file mode 100644
index 00000000000..6a74b3c0181
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md
@@ -0,0 +1,4 @@
+test1: OS_IF
+{% if OS == windows %}
+test1
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md
new file mode 100644
index 00000000000..2f9cdc38294
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md
@@ -0,0 +1,4 @@
+test1: OS_IF
+{-if-% if OS == windows %-if-}
+test1
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md
new file mode 100644
index 00000000000..360a4a59ba3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md
@@ -0,0 +1,7 @@
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{% if OS == windows %}
+test2
+{% endif %}
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md
new file mode 100644
index 00000000000..798dcf6db24
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md
@@ -0,0 +1,7 @@
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{-if-% if OS == windows %-if-}
+test2
+{-if-% endif %-if-}
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md
new file mode 100644
index 00000000000..d93125a5971
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md
@@ -0,0 +1,6 @@
+test3: OS_IF with else
+{% if OS == linux %}
+test3
+{% else %}
+test3
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md
new file mode 100644
index 00000000000..02141961338
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md
@@ -0,0 +1,6 @@
+test3: OS_IF with else
+{-if-% if OS == linux %-if-}
+test3
+{-if-% else %-if-}
+test3
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md
new file mode 100644
index 00000000000..cc15fae1df1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md
@@ -0,0 +1,4 @@
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md
new file mode 100644
index 00000000000..cc15fae1df1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md
@@ -0,0 +1,4 @@
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md
new file mode 100644
index 00000000000..bdb288474e2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md
@@ -0,0 +1,11 @@
+test5: OS_IF in OS_IF
+{% if OS == windows %}
+test5
+{% else %}
+{% if OS == linux %}
+test5
+{% else %}
+test5
+{% endif %}
+test5
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md
new file mode 100644
index 00000000000..10443eb67a4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md
@@ -0,0 +1,11 @@
+test5: OS_IF in OS_IF
+{-if-% if OS == windows %-if-}
+test5
+{-if-% else %-if-}
+{-if-% if OS == linux %-if-}
+test5
+{-if-% else %-if-}
+test5
+{-if-% endif %-if-}
+test5
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md
new file mode 100644
index 00000000000..0731ee3588c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md
@@ -0,0 +1,8 @@
+test6: NON_OS_IF in OS_IF
+{% if OS == macos %}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md
new file mode 100644
index 00000000000..cd37117cb00
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md
@@ -0,0 +1,8 @@
+test6: NON_OS_IF in OS_IF
+{-if-% if OS == macos %-if-}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md
new file mode 100644
index 00000000000..6a72a338527
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md
@@ -0,0 +1,9 @@
+test7: weird spacing and dashes
+	{%if OS == windows %}
+	test7
+{%- else%}
+	test7
+		{% if OS == linux%}
+test7
+	{%-endif %}
+{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md
new file mode 100644
index 00000000000..dfe342ebfb1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md
@@ -0,0 +1,9 @@
+test7: weird spacing and dashes
+	{-if-%if OS == windows %-if-}
+	test7
+{-if-%- else%-if-}
+	test7
+		{-if-% if OS == linux%-if-}
+test7
+	{-if-%-endif %-if-}
+{-if-%endif%-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md
new file mode 100644
index 00000000000..fb8c1f8b539
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md
@@ -0,0 +1,55 @@
+test1: OS_IF
+{% if OS == windows %}
+test1
+{% endif %}
+
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{% if OS == windows %}
+test2
+{% endif %}
+{% endif %}
+
+test3: OS_IF with else
+{% if OS == linux %}
+test3
+{% else %}
+test3
+{% endif %}
+
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
+
+test5: OS_IF in OS_IF
+{% if OS == windows %}
+test5
+{% else %}
+{% if OS == linux %}
+test5
+{% else %}
+test5
+{% endif %}
+test5
+{% endif %}
+
+test6: NON_OS_IF in OS_IF
+{% if OS == macos %}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{% endif %}
+
+test7: weird spacing and dashes
+	{%if OS == windows %}
+	test7
+{%- else%}
+	test7
+		{% if OS == linux%}
+test7
+	{%-endif %}
+{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md
new file mode 100644
index 00000000000..796e94348fa
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md
@@ -0,0 +1,55 @@
+test1: OS_IF
+{-if-% if OS == windows %-if-}
+test1
+{-if-% endif %-if-}
+
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{-if-% if OS == windows %-if-}
+test2
+{-if-% endif %-if-}
+{% endif %}
+
+test3: OS_IF with else
+{-if-% if OS == linux %-if-}
+test3
+{-if-% else %-if-}
+test3
+{-if-% endif %-if-}
+
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
+
+test5: OS_IF in OS_IF
+{-if-% if OS == windows %-if-}
+test5
+{-if-% else %-if-}
+{-if-% if OS == linux %-if-}
+test5
+{-if-% else %-if-}
+test5
+{-if-% endif %-if-}
+test5
+{-if-% endif %-if-}
+
+test6: NON_OS_IF in OS_IF
+{-if-% if OS == macos %-if-}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{-if-% endif %-if-}
+
+test7: weird spacing and dashes
+	{-if-%if OS == windows %-if-}
+	test7
+{-if-%- else%-if-}
+	test7
+		{-if-% if OS == linux%-if-}
+test7
+	{-if-%-endif %-if-}
+{-if-%endif%-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
new file mode 100644
index 00000000000..17053fe705c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
@@ -0,0 +1,32 @@
+import pytest
+import os
+import shutil
+from chatbot_parser import mangle_ifs
+
+
+@pytest.mark.parametrize("input_file,output_file", [
+    ("if_mangler_1_input.md", "if_mangler_1_output.md"),
+    ("if_mangler_2_input.md", "if_mangler_2_output.md"),
+    ("if_mangler_3_input.md", "if_mangler_3_output.md"),
+    ("if_mangler_4_input.md", "if_mangler_4_output.md"),
+    ("if_mangler_5_input.md", "if_mangler_5_output.md"),
+    ("if_mangler_6_input.md", "if_mangler_6_output.md"),
+    ("if_mangler_7_input.md", "if_mangler_7_output.md")
+])
+def test_if_mangler(input_file, output_file):
+    # make directory
+    os.makedirs(os.path.join("if_mangled_files"), exist_ok=True)
+
+    # make filepaths
+    input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file)
+    expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file)
+    actual_output_file_path = os.path.join("if_mangled_files", input_file)
+    mangle_ifs(input_file_path, input_file)
+
+    # check every line
+    with open(expected_output_file_path, "r") as expected_read_file:
+        with open(actual_output_file_path, "r") as actual_read_file:
+            assert all([expected_line == actual_line for expected_line, actual_line in zip(expected_read_file, actual_read_file)])
+
+    # remove directory
+    shutil.rmtree("if_mangled_files", ignore_errors=True)

From 2f4a277677ea9f20e8bad455b0e583dd1bf5b028 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 23 Aug 2024 16:26:10 +0200
Subject: [PATCH 090/152] clean up

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index c7dbe205737..d91cd0df7d7 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -890,17 +890,9 @@ def main():
     shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)
 
     # make the necessary directories
-    if not os.path.exists(COPIES):
-        os.mkdir(COPIES)
-
-    if not os.path.exists(os.path.join(COPIES, LINUX_TUTORIAL)):
-        os.mkdir(os.path.join(COPIES, LINUX_TUTORIAL))
-
-    if not os.path.exists(PARSED_MDS):
-        os.mkdir(PARSED_MDS)
-
-    if not os.path.exists(IF_MANGLED_FILES):
-        os.mkdir(IF_MANGLED_FILES)
+    for directory in [COPIES, os.path.join(COPIES, LINUX_TUTORIAL), PARSED_MDS, IF_MANGLED_FILES]:
+        if not os.path.exists(directory):
+            os.makedirs(directory)
 
     ################### define loop-invariant variables ###################
 

From cd0c8ebad9ddc2ec25ef987d88d945215cdf5070 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 23 Aug 2024 17:20:20 +0200
Subject: [PATCH 091/152] clean up customizable options

---
 .../chatbot_parser.py                         | 132 ++++++++++--------
 1 file changed, 72 insertions(+), 60 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index d91cd0df7d7..c262f112759 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -12,13 +12,13 @@
 from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template
 
 #################### define macro's ####################
-# customizable macros (customization made possible at the bottom of the script)
-SPLIT_ON_TITLES = True
-MIN_PARAGRAPH_LENGTH = 160
-MAX_TITLE_DEPTH = 4
-INCLUDE_LINKS_IN_PLAINTEXT = False
-SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES
-DEEP_DIRECTORIES = True and SPLIT_ON_TITLES  # Should always be False if SPLIT_ON_TITLES is False
+# options
+SPLIT_ON_TITLES = "SPLIT_ON_TITLES"
+MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH"
+MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH"
+INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT"
+SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS"
+DEEP_DIRECTORIES = "DEEP_DIRECTORIES"
 
 # directories
 PARSED_MDS = "parsed_mds"
@@ -99,24 +99,25 @@
 
 ################### define functions ###################
 
-def check_for_title(line, in_code_block, curr_dirs):
+def check_for_title(line, in_code_block, curr_dirs, options):
     """
     function that checks for titles in the current line. Used by split_text to split the text among the subtitles
 
     :param line: the current line to be checked for a title
     :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles
     :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found
+    :param options: dictionary containing the options given by the user
     :return title_length: The amount of hashtags in front of the title on the current line
     """
     # detect titles
     match = re.match(r'^#+ ', line)
-    if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1 and not in_code_block:
+    if match and len(match.group(0)) <= options[MAX_TITLE_DEPTH] + 1 and not in_code_block:
         title_length = len(match.group(0)) - 1
-        if DEEP_DIRECTORIES:
+        if options[DEEP_DIRECTORIES]:
             curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-')))
 
             # update the higher order current directories
-            for i in range(title_length + 1, MAX_TITLE_DEPTH + 1):
+            for i in range(title_length + 1, options[MAX_TITLE_DEPTH] + 1):
                 curr_dirs[i] = curr_dirs[title_length]
 
         return title_length
@@ -225,12 +226,13 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     return curr_line, linklist
 
 
-def split_text(file, main_title, current_paragraph_number=-1, OS=GENERIC):
+def split_text(file, main_title, options, current_paragraph_number=-1, OS=GENERIC):
     """
     Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata
 
     :param file: the filepath of the file to be split
     :param main_title: the main title of the file
+    :param options: dictionary containing the options given by the user
     :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level
     :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level
     :return paragraphs_text: dictionary containing the split sections of text
@@ -238,18 +240,19 @@ def split_text(file, main_title, current_paragraph_number=-1, OS=GENERIC):
     :return subtitle_order: list containing all encountered subtitles in order of appearance
     """
 
-    if SPLIT_ON_TITLES:
-        return split_on_titles(file, main_title)
-    elif SPLIT_ON_PARAGRAPHS:
-        return split_on_paragraphs(file, main_title, current_paragraph_number, OS)
+    if options[SPLIT_ON_TITLES]:
+        return split_on_titles(file, main_title, options)
+    elif options[SPLIT_ON_PARAGRAPHS]:
+        return split_on_paragraphs(file, main_title, options, current_paragraph_number, OS)
 
 
-def split_on_titles(file, main_title):
+def split_on_titles(file, main_title, options):
     """
     Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata
 
     :param file: the filepath of the file to be split
     :param main_title: the main title of the file
+    :param options: dictionary containing the options given by the user
     :return paragraphs_text: dictionary containing the split sections of text
     :return paragraphs_metadata: dictionary containing the metadata of each split section of text
     :return subtitle_order: list containing all encountered subtitles in order of appearance
@@ -282,9 +285,9 @@ def split_on_titles(file, main_title):
 
     # list to keep track of most recent directories on each title level
     if LINUX_TUTORIAL not in file:
-        curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)]
+        curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
     else:
-        curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)]
+        curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)]
 
     with open(file, 'r') as readfile:
 
@@ -296,7 +299,7 @@ def split_on_titles(file, main_title):
             # only split up if current line is in a fully non-os-specific section
             if in_if_statement == 0:
 
-                title_level = check_for_title(line, in_code_block, curr_dirs)
+                title_level = check_for_title(line, in_code_block, curr_dirs, options)
 
                 # detect codeblocks to make sure titles aren't detected in them
                 if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
@@ -355,12 +358,13 @@ def split_on_titles(file, main_title):
     return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
 
 
-def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERIC):
+def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, OS=GENERIC):
     """
     Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata
 
     :param file: the filepath of the file to be split
     :param main_title: the main title of the file
+    :param options: dictionary containing the options given by the user
     :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph
     :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph
     :return paragraphs_text: dictionary containing the split sections of text
@@ -398,9 +402,9 @@ def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERI
 
     # list to keep track of most recent directories on each title level
     if LINUX_TUTORIAL not in file:
-        curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)]
+        curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
     else:
-        curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)]
+        curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)]
 
     with open(file, 'r') as readfile:
 
@@ -413,14 +417,14 @@ def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERI
             # only split up if current line is in a fully non-os-specific section
             if in_if_statement == 0:
 
-                title_level = check_for_title(line, in_code_block, curr_dirs)
+                title_level = check_for_title(line, in_code_block, curr_dirs, options)
 
                 # detect codeblocks to make sure titles aren't detected in them
                 if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
                     in_code_block = not in_code_block
 
                 # check whether a new paragraph should be started
-                if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= MIN_PARAGRAPH_LENGTH and not in_code_block:
+                if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block:
 
                     # create a title for the previous paragraph
                     if current_paragraph_number == -1:
@@ -678,7 +682,7 @@ def make_valid_title(title):
     return valid_filename
 
 
-def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number):
+def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options):
     """
     Function that writes text and metadata of a generic (non-os-specific) file
 
@@ -687,6 +691,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
     :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
     :param title_order: list containing all subtitles in order
     :param title_order_number: order number of the title of the section that is being written
+    :param options: dictionary containing the options given by the user
     :return:
     """
 
@@ -695,13 +700,13 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
         filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
         os.makedirs(filepath, exist_ok=True)
 
-        write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC)
+        write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options)
     else:
         # don't write empty files
         pass
 
 
-def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS):
+def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options):
     """
     Function to write files to a certain filepath
 
@@ -712,6 +717,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
     :param title_order_number: order number of the title of the section that is being written
     :param filepath: filepath to write files to
     :param OS: OS to be included in the metadata
+    :param options: dictionary containing the options given by the user
     :return:
     """
 
@@ -722,7 +728,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
     # write text file
     with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile:
         if LINKS in paragraphs_metadata[title].keys():
-            adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS])
+            adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS], options)
             writefile.write(adapted_text)
         else:
             writefile.write(text)
@@ -759,12 +765,13 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
         json.dump(metadata, writefile, indent=4)
 
 
-def insert_links(text, links):
+def insert_links(text, links, options):
     """
     Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT
 
     :param text: The plaintext that needs to be adapted
     :param links: The links that might need to be inserted
+    :param options: dictionary containing the options given by the user
     :return text: The adapted plaintext
     :return links: The links that were actually present in the text
     """
@@ -773,7 +780,7 @@ def insert_links(text, links):
     new_links = {}
     for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text):
         present_links.append(link_number.group(1))
-        if INCLUDE_LINKS_IN_PLAINTEXT:
+        if options[INCLUDE_LINKS_IN_PLAINTEXT]:
             text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text)
         else:
             text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text)
@@ -785,7 +792,7 @@ def insert_links(text, links):
     return text, new_links
 
 
-def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata):
+def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options):
     """
     Function that splits os-specific sections into subtitles, parses them using jinja and writes them away
 
@@ -794,6 +801,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
     :param subtitle_order: order of the subtitles generated by the splitter
     :param title_order_number: order number of the section
     :param all_metadata: all metadata generated by the splitter
+    :param options: dictionary containing the options given by the user
     :return:
     """
 
@@ -821,7 +829,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
                     writefile.write(jinja_text)
 
                 # split in right way
-                _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
+                _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
 
             else:
                 os.makedirs(LINUX_TUTORIAL, exist_ok=True)
@@ -829,7 +837,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
                     writefile.write(jinja_text)
 
                 # split in right way
-                _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
+                _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
 
             # prepare variables to fix metadata
             total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:]
@@ -853,13 +861,13 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
                             parent = total_subtitle_order[parent_i]
                         parent_i += 1
 
-                    if SPLIT_ON_PARAGRAPHS and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]:
+                    if options[SPLIT_ON_PARAGRAPHS] and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]:
                         os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE]
                     else:
                         os_specific_metadata[os_subtitle][PARENT_TITLE] = parent
 
                     # fix directory in the metadata if needed
-                    if DEEP_DIRECTORIES:
+                    if options[DEEP_DIRECTORIES]:
                         if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]:
                             os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE])
                         else:
@@ -870,7 +878,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
                     os.makedirs(filepath, exist_ok=True)
 
                     # write to files
-                    write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS)
+                    write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options)
                 else:
                     # don't write empty files
                     pass
@@ -884,6 +892,28 @@ def main():
     main function
     :return:
     """
+    parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n")
+
+    # adding command-line options
+
+    parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)")
+    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)")
+    parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)")
+    parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
+    parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one")
+
+    args = parser.parse_args()
+
+    options = {SPLIT_ON_TITLES: bool(args.split_on_titles),
+               SPLIT_ON_PARAGRAPHS: not args.split_on_titles,
+               MIN_PARAGRAPH_LENGTH: args.min_paragraph_length,
+               MAX_TITLE_DEPTH: args.max_title_depth,
+               INCLUDE_LINKS_IN_PLAINTEXT: args.links,
+               DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles}
+
+    if options[DEEP_DIRECTORIES]:
+        print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
+
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
     shutil.rmtree(PARSED_MDS, ignore_errors=True)
     shutil.rmtree(COPIES, ignore_errors=True)
@@ -962,18 +992,18 @@ def main():
             jinja_parser(filename, copy_file)
 
             # split the text in paragraphs
-            paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title)
+            paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options)
 
             # for every section, either make the whole section generic, or create an os-specific file for each OS
             for i, subtitle in enumerate(subtitle_order):
 
                 # generic
                 if subtitle in paragraphs_os_free_text.keys():
-                    write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i)
+                    write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options)
 
                 # os-specific
                 else:
-                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata)
+                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options)
 
     # clean up temporary directories and files
     shutil.rmtree(COPIES, ignore_errors=True)
@@ -982,27 +1012,9 @@ def main():
     if os.path.exists(TEMP_JINJA_FILE):
         os.remove(TEMP_JINJA_FILE)
 
+    print("Parsing finished successfully")
+
 
 ################### run the script ###################
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n")
-
-    # adding command-line options
-
-    parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)")
-    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)")
-    parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)")
-    parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
-
-    args = parser.parse_args()
-
-    SPLIT_ON_TITLES = bool(args.split_on_titles)
-    MIN_PARAGRAPH_LENGTH = args.min_paragraph_length
-    MAX_TITLE_DEPTH = args.max_title_depth
-    INCLUDE_LINKS_IN_PLAINTEXT = args.links
-    SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES
-    DEEP_DIRECTORIES = True and SPLIT_ON_TITLES  # Should always be False if SPLIT_ON_TITLES is False
-    if DEEP_DIRECTORIES:
-        print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
     main()
-    print("Parsing finished successfully")

From 3be262a84c2a574239a554a08b1e760322b470ee Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 09:57:58 +0200
Subject: [PATCH 092/152] further adapt the script to be able to test it

---
 .../chatbot_parser.py                         | 41 ++++++++++---------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index c262f112759..951fea42302 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -887,29 +887,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
             pass
 
 
-def main():
+def main(options):
     """
     main function
     :return:
     """
-    parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n")
-
-    # adding command-line options
-
-    parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)")
-    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)")
-    parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)")
-    parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
-    parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one")
-
-    args = parser.parse_args()
-
-    options = {SPLIT_ON_TITLES: bool(args.split_on_titles),
-               SPLIT_ON_PARAGRAPHS: not args.split_on_titles,
-               MIN_PARAGRAPH_LENGTH: args.min_paragraph_length,
-               MAX_TITLE_DEPTH: args.max_title_depth,
-               INCLUDE_LINKS_IN_PLAINTEXT: args.links,
-               DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles}
 
     if options[DEEP_DIRECTORIES]:
         print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
@@ -1017,4 +999,23 @@ def main():
 
 ################### run the script ###################
 if __name__ == '__main__':
-    main()
+    parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n")
+
+    # adding command-line options
+
+    parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)")
+    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)")
+    parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)")
+    parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
+    parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one")
+
+    args = parser.parse_args()
+
+    options = {SPLIT_ON_TITLES: bool(args.split_on_titles),
+               SPLIT_ON_PARAGRAPHS: not args.split_on_titles,
+               MIN_PARAGRAPH_LENGTH: args.min_paragraph_length,
+               MAX_TITLE_DEPTH: args.max_title_depth,
+               INCLUDE_LINKS_IN_PLAINTEXT: args.links,
+               DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles}
+    
+    main(options)

From 1d32aab468c7d3698c69761c0783efd99196cdf1 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 10:11:32 +0200
Subject: [PATCH 093/152] make changes to usage in command line to be more
 intuitive

---
 .../chatbot_parser.py                         | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 951fea42302..e0741a9a347 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -1003,19 +1003,19 @@ def main(options):
 
     # adding command-line options
 
-    parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)")
-    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)")
-    parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)")
+    parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.")
+    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is disabled (default: 160)")
+    parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)")
     parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
-    parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one")
+    parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled")
 
     args = parser.parse_args()
 
-    options = {SPLIT_ON_TITLES: bool(args.split_on_titles),
-               SPLIT_ON_PARAGRAPHS: not args.split_on_titles,
-               MIN_PARAGRAPH_LENGTH: args.min_paragraph_length,
-               MAX_TITLE_DEPTH: args.max_title_depth,
-               INCLUDE_LINKS_IN_PLAINTEXT: args.links,
-               DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles}
-    
-    main(options)
+    options_dict = {SPLIT_ON_TITLES: args.split_on_titles,
+                    SPLIT_ON_PARAGRAPHS: not args.split_on_titles,
+                    MIN_PARAGRAPH_LENGTH: args.min_paragraph_length,
+                    MAX_TITLE_DEPTH: args.max_title_depth,
+                    INCLUDE_LINKS_IN_PLAINTEXT: args.links,
+                    DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles}
+
+    main(options_dict)

From 5902c96c19985f4225a34f7d081e294482bedcce Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 10:37:51 +0200
Subject: [PATCH 094/152] first revised version of the README

---
 scripts/HPC_chatbot_preprocessor/README.md | 67 ++++++++++++++++------
 1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 55996e0bef5..86bea6b9ed7 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -1,6 +1,44 @@
 # Chatbot parser
 
-`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. 
+`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot.
+
+## Usage
+
+The script can be ran in a shell environment with the following command:
+
+```shell
+python chatbot_parser.py
+```
+
+This command has the following possible options:
+
+```shell
+chatbot_parser.py [-h] [-st SPLIT_ON_TITLES] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd]
+```
+
+### `h`/`help`
+
+Display the help message
+
+### `st`/`split_on_titles`
+
+Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length.
+
+### `pl`/`min_paragraph_length`
+
+This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 160 characters. This options only works if `split_on_titles` is not enabled.
+
+### `td`/`max_title_depth`
+
+This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4.
+
+### `l`/`links`
+
+Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext.
+
+### `dd`/`deep_directories`
+
+Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled.
 
 ## Generated file structure
 
@@ -11,22 +49,17 @@ The generated directory structure is written as a subdirectory of `parsed_mds`.
 
 Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation.
 
-These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. 
-
-Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on <docs.hpc.ugent.be>.
+Both the generic and each of the three os-specific directories then contain a directory for each source file. 
 
-## Requirements
+If the option `deep_directories` is not enabled, all paragraphs of the source file and their corresponding metadata will be saved in this directory. The (processed) plaintext of the paragraph is written to a `.txt` file and the metadata is written to a `.json` file.
 
-- The required Python packages are listed in `requirements.txt`
-- [Pandoc](https://pandoc.org/installing.html) must be installed and must be added to the system PATH
+If the option `deep_directories` is enabled, the directory of each source file will contain a subdirectory structure corresponding to the structure of the subtitles at different levels in the source file. Each subtitle in the source file corresponds to a directory nested in the directory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). 
 
-## Usage
+Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and a `.json` file with the metadata of that section.
 
-The script can be ran in a shell environment with the following command:
+## Requirements
 
-```shell
-python chatbot_parser.py
-```
+- The required Python packages are listed in `requirements.txt`
 
 ## Restrictions on source-files
 
@@ -102,13 +135,9 @@ endif
 
 This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it.
 
-### Allowed html syntax
+### html syntax
 
-The script contains a list of html syntax keywords it filters out. If more html syntax keywords are used in the future, it suffices to add them to this list to adapt the script to filter them out. The current list is:
-```
-["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"]
-```
-The script is also adapted to take into consideration structures like `<a href="link">` and retain the link.
+The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. 
 
 ### Markdown comments
 
@@ -121,4 +150,4 @@ Any comments within the markdown files (for example TODO's) should follow the fo
 
 ### Long filepaths
 
-Due to the nature of this script, it can generate large directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long.
+Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`.

From 6e488005ed15345e34878c7a9ee7944d554a42ef Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 11:00:58 +0200
Subject: [PATCH 095/152] added docstring to main function

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index e0741a9a347..282e1607031 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -890,6 +890,14 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
 def main(options):
     """
     main function
+
+    :param options: dictionary containing the options specified by the user to run the script:
+                    {SPLIT_ON_TITLES: boolean indicating whether to split on titles,
+                    SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES),
+                    MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph,
+                    MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text,
+                    INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext,
+                    DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not}
     :return:
     """
 

From 0bc440bc71dc0cb2a01fc799db5566c112f0c481 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 11:22:23 +0200
Subject: [PATCH 096/152] include chatbot_prepprocessor

---
 scripts/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/README.md b/scripts/README.md
index eed5a73e4d5..a88bd42cc46 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,3 +1,4 @@
 Scripts that can be used to automatically generate markdown files, can be found here.
 
-* [`available_software`](available_software): script to generate overview of available environment modules;
\ No newline at end of file
+* [`available_software`](available_software): script to generate overview of available environment modules;
+* [`chatbot_preprocessor`](HPC_chatbot_preprocessor): script to generate input files for the chatbot;
\ No newline at end of file

From e6e6023c068b8c512af808cb40d4bd1dd68c1603 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 12:26:38 +0200
Subject: [PATCH 097/152] added options for source and destination directories

---
 scripts/HPC_chatbot_preprocessor/README.md    |  10 +-
 .../chatbot_parser.py                         | 170 ++++++++----------
 2 files changed, 86 insertions(+), 94 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 86bea6b9ed7..82aaa9b7e3c 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -13,13 +13,21 @@ python chatbot_parser.py
 This command has the following possible options:
 
 ```shell
-chatbot_parser.py [-h] [-st SPLIT_ON_TITLES] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd]
+chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd]
 ```
 
 ### `h`/`help`
 
 Display the help message
 
+### `src`/`source`
+
+This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`).
+
+### `dst`/`destination`
+
+This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script.
+
 ### `st`/`split_on_titles`
 
 Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length.
diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 282e1607031..245c5d68f51 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -13,6 +13,8 @@
 
 #################### define macro's ####################
 # options
+SOURCE_DIRECTORY = "SOURCE_DIRECTORY"
+DESTINATION_DIRECTORY = "DESTINATION_DIRECTORY"
 SPLIT_ON_TITLES = "SPLIT_ON_TITLES"
 MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH"
 MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH"
@@ -284,10 +286,7 @@ def split_on_titles(file, main_title, options):
     previous_contained_if = False
 
     # list to keep track of most recent directories on each title level
-    if LINUX_TUTORIAL not in file:
-        curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
-    else:
-        curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)]
+    curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
 
     with open(file, 'r') as readfile:
 
@@ -515,12 +514,13 @@ def write_metadata(main_title, subtitle, links, title_level, directory):
     return paragraph_metadata
 
 
-def jinja_parser(filename, copy_location):
+def jinja_parser(filename, copy_location, options):
     """
     function that let's jinja do its thing to format the files except for the os-related if-statements
 
     :param filename: the name of the file that needs to be formatted using jinja
     :param copy_location: the location of the file that needs to be formatted using jinja
+    :param options: dictionary containing the options given by the user
     :return:
     """
     # YAML file location
@@ -539,10 +539,10 @@ def jinja_parser(filename, copy_location):
     combined_context = {**words_dict, **additional_context}
 
     # Mangle the OS-related if-statements
-    mangle_ifs(copy_location, filename)
+    mangle_ifs(copy_location, filename, options)
 
     # Use Jinja2 to replace the macros
-    template_loader = ChoiceLoader([FileSystemLoader(searchpath=[IF_MANGLED_FILES, os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR)]), FunctionLoader(load_macros)])
+    template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)])
     templateEnv = Environment(loader=template_loader)
     template = templateEnv.get_template(filename)
     rendered_content = template.render(combined_context)
@@ -642,18 +642,19 @@ def mangle_os_ifs(line, is_os):
     return line, is_os
 
 
-def mangle_ifs(directory, filename):
+def mangle_ifs(directory, filename, options):
     """
     function that writes the if-mangled version of a file to a location where the jinja parser will use it
 
     :param directory: the directory of the file to be if mangled
     :param filename: the filename of the file to be mangled
+    :param options: dictionary containing the options given by the user
     :return:
     """
     # variable to keep track of latest if-statement scope
     is_os = NON_OS_IF
 
-    with open(os.path.join(IF_MANGLED_FILES,  filename), 'w') as write_file:
+    with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'w') as write_file:
         with open(directory, 'r') as read_file:
             for line in read_file:
                 new_line, is_os = mangle_os_ifs(line, is_os)
@@ -682,7 +683,7 @@ def make_valid_title(title):
     return valid_filename
 
 
-def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options):
+def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options, is_linux_tutorial):
     """
     Function that writes text and metadata of a generic (non-os-specific) file
 
@@ -692,21 +693,22 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
     :param title_order: list containing all subtitles in order
     :param title_order_number: order number of the title of the section that is being written
     :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
     :return:
     """
 
     if len(paragraphs_text[title]) > 0:
         # make the directory needed for the files that will be written
-        filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
+        filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
         os.makedirs(filepath, exist_ok=True)
 
-        write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options)
+        write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial)
     else:
         # don't write empty files
         pass
 
 
-def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options):
+def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options, is_linux_tutorial):
     """
     Function to write files to a certain filepath
 
@@ -718,6 +720,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
     :param filepath: filepath to write files to
     :param OS: OS to be included in the metadata
     :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
     :return:
     """
 
@@ -750,7 +753,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
     metadata[METADATA_OS] = OS
 
     # add reference link
-    if bool(LINUX_TUTORIAL in paragraphs_metadata[title][DIRECTORY]):
+    if is_linux_tutorial:
         linux_part = LINUX_TUTORIAL + "/"
     else:
         linux_part = ""
@@ -792,7 +795,7 @@ def insert_links(text, links, options):
     return text, new_links
 
 
-def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options):
+def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options, is_linux_tutorial):
     """
     Function that splits os-specific sections into subtitles, parses them using jinja and writes them away
 
@@ -802,6 +805,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
     :param title_order_number: order number of the section
     :param all_metadata: all metadata generated by the splitter
     :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
     :return:
     """
 
@@ -824,20 +828,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
             # re-adjust text to correct overcorrections
             jinja_text = re.sub('"' + OS + '"', OS, jinja_text)
 
-            if LINUX_TUTORIAL not in metadata[DIRECTORY]:
-                with open(TEMP_JINJA_FILE, 'w') as writefile:
-                    writefile.write(jinja_text)
+            with open(TEMP_JINJA_FILE, 'w') as writefile:
+                writefile.write(jinja_text)
 
-                # split in right way
-                _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
-
-            else:
-                os.makedirs(LINUX_TUTORIAL, exist_ok=True)
-                with open(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), 'w') as writefile:
-                    writefile.write(jinja_text)
-
-                # split in right way
-                _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
+            # split in right way
+            _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
 
             # prepare variables to fix metadata
             total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:]
@@ -874,11 +869,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
                             os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE])
 
                     # make a directory to save the files
-                    filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
+                    filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
                     os.makedirs(filepath, exist_ok=True)
 
                     # write to files
-                    write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options)
+                    write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial)
                 else:
                     # don't write empty files
                     pass
@@ -905,32 +900,27 @@ def main(options):
         print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
 
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
-    shutil.rmtree(PARSED_MDS, ignore_errors=True)
-    shutil.rmtree(COPIES, ignore_errors=True)
-    shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS), ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True)
 
     # make the necessary directories
-    for directory in [COPIES, os.path.join(COPIES, LINUX_TUTORIAL), PARSED_MDS, IF_MANGLED_FILES]:
+    for directory in [COPIES, PARSED_MDS, IF_MANGLED_FILES]:
+        directory = os.path.join(options[DESTINATION_DIRECTORY], directory)
         if not os.path.exists(directory):
             os.makedirs(directory)
 
     ################### define loop-invariant variables ###################
 
     # constant that keeps track of the source directories
-    source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR),
-                          os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)]
+    source_directory = options[SOURCE_DIRECTORY]
 
     # list of all the filenames
-    filenames_generic = {}
-    filenames_linux = {}
-    for source_directory in source_directories:
-        all_items = os.listdir(source_directory)
-        files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
-        for file in files:
-            if LINUX_TUTORIAL in source_directory:
-                filenames_linux[file] = os.path.join(source_directory, file)
-            else:
-                filenames_generic[file] = os.path.join(source_directory, file)
+    filenames = {}
+    all_items = os.listdir(source_directory)
+    files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+    for file in files:
+        filenames[file] = os.path.join(source_directory, file)
 
     # # Temporary variables to test with just one singular file
     # filenames_generic = {}
@@ -940,65 +930,55 @@ def main(options):
     # filenames_linux["common_pitfalls.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/common_pitfalls.md"
 
     # for loops over all files
-    for filenames in [filenames_generic, filenames_linux]:
-        for filename in filenames.keys():
-            ################### define/reset loop specific variables ###################
+    for filename in filenames.keys():
+        ################### define/reset loop specific variables ###################
 
-            # variable that keeps track of whether file is part of the linux tutorial
-            is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename])
+        # boolean indicating whether the current file is part of the linux tutorial
+        is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename])
 
-            # make a copy of the original file in order to make sure the original does not get altered
-            if is_linux_tutorial:
-                copy_file = os.path.join(COPIES, LINUX_TUTORIAL,  filename)
-            else:
-                copy_file = os.path.join(COPIES, filename)
-            shutil.copyfile(filenames[filename], copy_file)
-
-            # variable that keeps track of the directories that are used to write in at different levels
-            if is_linux_tutorial:
-                root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR, LINUX_TUTORIAL)
-                root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX, LINUX_TUTORIAL)
-                root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS, LINUX_TUTORIAL)
-                root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS, LINUX_TUTORIAL)
-            else:
-                root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR)
-                root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX)
-                root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS)
-                root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS)
+        # make a copy of the original file in order to make sure the original does not get altered
+        copy_file = os.path.join(options[DESTINATION_DIRECTORY], COPIES, filename)
+        shutil.copyfile(filenames[filename], copy_file)
 
-            # variable for the main title (needed for reference links)
-            main_title = filename[:-3]
+        # variable that keeps track of the directories that are used to write in at different levels
+        root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR)
+        root_dir_os_specific_linux = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, LINUX)
+        root_dir_os_specific_windows = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS)
+        root_dir_os_specific_macos = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, MACOS)
 
-            # variable that keeps track of the directories that are used to write in at different levels
-            curr_dirs = [filename[:-3] for _ in range(5)]
+        # variable for the main title (needed for reference links)
+        main_title = filename[:-3]
 
-            ################### actually parse the md file ###################
+        # variable that keeps track of the directories that are used to write in at different levels
+        curr_dirs = [filename[:-3] for _ in range(5)]
 
-            # create directories for the source markdown file
-            for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
-                os.makedirs(directory, exist_ok=True)
+        ################### actually parse the md file ###################
 
-            # process the jinja macros
-            jinja_parser(filename, copy_file)
+        # create directories for the source markdown file
+        for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
+            os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True)
 
-            # split the text in paragraphs
-            paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options)
+        # process the jinja macros
+        jinja_parser(filename, copy_file, options)
 
-            # for every section, either make the whole section generic, or create an os-specific file for each OS
-            for i, subtitle in enumerate(subtitle_order):
+        # split the text in paragraphs
+        paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options)
 
-                # generic
-                if subtitle in paragraphs_os_free_text.keys():
-                    write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options)
+        # for every section, either make the whole section generic, or create an os-specific file for each OS
+        for i, subtitle in enumerate(subtitle_order):
 
-                # os-specific
-                else:
-                    split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options)
+            # generic
+            if subtitle in paragraphs_os_free_text.keys():
+                write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options, is_linux_tutorial)
+
+            # os-specific
+            else:
+                split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial)
 
     # clean up temporary directories and files
-    shutil.rmtree(COPIES, ignore_errors=True)
-    shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True)
-    shutil.rmtree(LINUX_TUTORIAL, ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL), ignore_errors=True)
     if os.path.exists(TEMP_JINJA_FILE):
         os.remove(TEMP_JINJA_FILE)
 
@@ -1011,6 +991,8 @@ def main(options):
 
     # adding command-line options
 
+    parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located")
+    parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to")
     parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.")
     parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is disabled (default: 160)")
     parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)")
@@ -1019,7 +1001,9 @@ def main(options):
 
     args = parser.parse_args()
 
-    options_dict = {SPLIT_ON_TITLES: args.split_on_titles,
+    options_dict = {SOURCE_DIRECTORY: args.source,
+                    DESTINATION_DIRECTORY: args.destination,
+                    SPLIT_ON_TITLES: args.split_on_titles,
                     SPLIT_ON_PARAGRAPHS: not args.split_on_titles,
                     MIN_PARAGRAPH_LENGTH: args.min_paragraph_length,
                     MAX_TITLE_DEPTH: args.max_title_depth,

From a6d99d9c724e453c9adb4262b747ddbf01ab711e Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 12:27:20 +0200
Subject: [PATCH 098/152] cleanup

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 245c5d68f51..bfc152cee60 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -922,13 +922,6 @@ def main(options):
     for file in files:
         filenames[file] = os.path.join(source_directory, file)
 
-    # # Temporary variables to test with just one singular file
-    # filenames_generic = {}
-    # filenames_linux = {}
-    # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md"
-    # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md"
-    # filenames_linux["common_pitfalls.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/common_pitfalls.md"
-
     # for loops over all files
     for filename in filenames.keys():
         ################### define/reset loop specific variables ###################

From 2be834f19ce8729a0d28ef4b89ddeea59b5e398e Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 13:14:22 +0200
Subject: [PATCH 099/152] cleanup

---
 .../HPC_chatbot_preprocessor/chatbot_parser.py    | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index bfc152cee60..26cf15b79a2 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -86,6 +86,9 @@
 # link indicators
 LINK_MARKER = r'§link§link§'
 
+# HTML tags
+HTML_TAGS = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"]  # make sure these are always lowercase
+
 # regex patterns
 IF_MANGLED_PATTERNS = {
         IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})',
@@ -164,9 +167,8 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     match = re.findall(r'<(.*?)>', curr_line)
     if match:
         for i, content in enumerate(match):
-            syntax_words = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"]  # make sure these are always lowercase
-            syntax_words_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in syntax_words]))
-            syntax_words_style = [element + " style=.*" for element in syntax_words]
+            html_tags_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in HTML_TAGS]))
+            html_tags_style = [element + " style=.*" for element in HTML_TAGS]
 
             # add references for every link of format <a href=...>
             if re.search(r'a href=.*', content):
@@ -175,11 +177,11 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
                 linklist.append(link)
 
             # drop the syntax words
-            elif content.lower() in syntax_words_variations:
+            elif content.lower() in html_tags_variations:
                 curr_line = re.sub(f'<{content}>', "", curr_line)
 
-            # drop the version of the syntax_words followed by " style="
-            elif any(re.match(pattern, content) for pattern in syntax_words_style):
+            # drop the version of the HTML_TAGS followed by " style="
+            elif any(re.match(pattern, content) for pattern in html_tags_style):
                 curr_line = re.sub(r'<.*?>', "", curr_line)
 
             # drop markdown comments
@@ -983,7 +985,6 @@ def main(options):
     parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n")
 
     # adding command-line options
-
     parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located")
     parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to")
     parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.")

From 532543a18785e966a76a830c04055ec46425d20e Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 13:17:07 +0200
Subject: [PATCH 100/152] cleanup

---
 scripts/HPC_chatbot_preprocessor/README.md | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 82aaa9b7e3c..bc2922aaf5a 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -16,35 +16,37 @@ This command has the following possible options:
 chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd]
 ```
 
-### `h`/`help`
+### Options
+
+#### `h`/`help`
 
 Display the help message
 
-### `src`/`source`
+#### `src`/`source`
 
 This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`).
 
-### `dst`/`destination`
+#### `dst`/`destination`
 
 This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script.
 
-### `st`/`split_on_titles`
+#### `st`/`split_on_titles`
 
 Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length.
 
-### `pl`/`min_paragraph_length`
+#### `pl`/`min_paragraph_length`
 
 This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 160 characters. This options only works if `split_on_titles` is not enabled.
 
-### `td`/`max_title_depth`
+#### `td`/`max_title_depth`
 
 This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4.
 
-### `l`/`links`
+#### `l`/`links`
 
 Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext.
 
-### `dd`/`deep_directories`
+#### `dd`/`deep_directories`
 
 Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled.
 

From 107464e57b3581d96130eeea63f7d3390025125e Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 13:42:40 +0200
Subject: [PATCH 101/152] relocate test files

---
 .../if_mangler_test_files/if_mangler_1_input.md                   | 0
 .../if_mangler_test_files/if_mangler_1_output.md                  | 0
 .../if_mangler_test_files/if_mangler_2_input.md                   | 0
 .../if_mangler_test_files/if_mangler_2_output.md                  | 0
 .../if_mangler_test_files/if_mangler_3_input.md                   | 0
 .../if_mangler_test_files/if_mangler_3_output.md                  | 0
 .../if_mangler_test_files/if_mangler_4_input.md                   | 0
 .../if_mangler_test_files/if_mangler_4_output.md                  | 0
 .../if_mangler_test_files/if_mangler_5_input.md                   | 0
 .../if_mangler_test_files/if_mangler_5_output.md                  | 0
 .../if_mangler_test_files/if_mangler_6_input.md                   | 0
 .../if_mangler_test_files/if_mangler_6_output.md                  | 0
 .../if_mangler_test_files/if_mangler_7_input.md                   | 0
 .../if_mangler_test_files/if_mangler_7_output.md                  | 0
 .../if_mangler_test_files/if_mangler_input.md                     | 0
 .../if_mangler_test_files/if_mangler_output.md                    | 0
 .../tests/test_files/test_paragraph_split_1.md                    | 0
 .../example_text_1.md => test_files/test_title_split_1.md}        | 0
 18 files changed, 0 insertions(+), 0 deletions(-)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_1_input.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_1_output.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_2_input.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_2_output.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_3_input.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_3_output.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_4_input.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_4_output.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_5_input.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_5_output.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_6_input.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_6_output.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_7_input.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_7_output.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_input.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_output.md (100%)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md
 rename scripts/HPC_chatbot_preprocessor/tests/{example_files/example_text_1.md => test_files/test_title_split_1.md} (100%)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md

From dd64381efc3b7156c2905f69aec54572b2ca2c53 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 13:47:22 +0200
Subject: [PATCH 102/152] update arguments of if mangler

---
 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
index 17053fe705c..4d0dd876103 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
@@ -18,10 +18,10 @@ def test_if_mangler(input_file, output_file):
     os.makedirs(os.path.join("if_mangled_files"), exist_ok=True)
 
     # make filepaths
-    input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file)
-    expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file)
+    input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file)
+    expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file)
     actual_output_file_path = os.path.join("if_mangled_files", input_file)
-    mangle_ifs(input_file_path, input_file)
+    mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'})
 
     # check every line
     with open(expected_output_file_path, "r") as expected_read_file:

From ef3fd584a21e2e417363a5f083bcf94261739ceb Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 13:54:19 +0200
Subject: [PATCH 103/152] relocate full test files

---
 .../test_paragraph_split_1.md                 |  0
 .../test_title_split_1.md                     |  0
 .../if_mangler_1_input.md                     |  4 --
 .../if_mangler_1_output.md                    |  4 --
 .../if_mangler_2_input.md                     |  7 ---
 .../if_mangler_2_output.md                    |  7 ---
 .../if_mangler_3_input.md                     |  6 --
 .../if_mangler_3_output.md                    |  6 --
 .../if_mangler_4_input.md                     |  4 --
 .../if_mangler_4_output.md                    |  4 --
 .../if_mangler_5_input.md                     | 11 ----
 .../if_mangler_5_output.md                    | 11 ----
 .../if_mangler_6_input.md                     |  8 ---
 .../if_mangler_6_output.md                    |  8 ---
 .../if_mangler_7_input.md                     |  9 ---
 .../if_mangler_7_output.md                    |  9 ---
 .../if_mangler_test_files/if_mangler_input.md | 55 -------------------
 .../if_mangler_output.md                      | 55 -------------------
 18 files changed, 208 deletions(-)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_paragraph_split}/test_paragraph_split_1.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_title_split}/test_title_split_1.md (100%)
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
deleted file mode 100644
index 6a74b3c0181..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
+++ /dev/null
@@ -1,4 +0,0 @@
-test1: OS_IF
-{% if OS == windows %}
-test1
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
deleted file mode 100644
index 2f9cdc38294..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
+++ /dev/null
@@ -1,4 +0,0 @@
-test1: OS_IF
-{-if-% if OS == windows %-if-}
-test1
-{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
deleted file mode 100644
index 360a4a59ba3..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
+++ /dev/null
@@ -1,7 +0,0 @@
-test2: OS_IF in NON_OS_IF
-{% if site == Gent %}
-test2
-{% if OS == windows %}
-test2
-{% endif %}
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
deleted file mode 100644
index 798dcf6db24..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
+++ /dev/null
@@ -1,7 +0,0 @@
-test2: OS_IF in NON_OS_IF
-{% if site == Gent %}
-test2
-{-if-% if OS == windows %-if-}
-test2
-{-if-% endif %-if-}
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
deleted file mode 100644
index d93125a5971..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
+++ /dev/null
@@ -1,6 +0,0 @@
-test3: OS_IF with else
-{% if OS == linux %}
-test3
-{% else %}
-test3
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
deleted file mode 100644
index 02141961338..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
+++ /dev/null
@@ -1,6 +0,0 @@
-test3: OS_IF with else
-{-if-% if OS == linux %-if-}
-test3
-{-if-% else %-if-}
-test3
-{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
deleted file mode 100644
index cc15fae1df1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
+++ /dev/null
@@ -1,4 +0,0 @@
-test4: OS_IF with wrong syntax
-{ if OS == macos }
-test4
-{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
deleted file mode 100644
index cc15fae1df1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
+++ /dev/null
@@ -1,4 +0,0 @@
-test4: OS_IF with wrong syntax
-{ if OS == macos }
-test4
-{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
deleted file mode 100644
index bdb288474e2..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
+++ /dev/null
@@ -1,11 +0,0 @@
-test5: OS_IF in OS_IF
-{% if OS == windows %}
-test5
-{% else %}
-{% if OS == linux %}
-test5
-{% else %}
-test5
-{% endif %}
-test5
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
deleted file mode 100644
index 10443eb67a4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
+++ /dev/null
@@ -1,11 +0,0 @@
-test5: OS_IF in OS_IF
-{-if-% if OS == windows %-if-}
-test5
-{-if-% else %-if-}
-{-if-% if OS == linux %-if-}
-test5
-{-if-% else %-if-}
-test5
-{-if-% endif %-if-}
-test5
-{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
deleted file mode 100644
index 0731ee3588c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
+++ /dev/null
@@ -1,8 +0,0 @@
-test6: NON_OS_IF in OS_IF
-{% if OS == macos %}
-test6
-{% if site == Gent %}
-test6
-{% endif %}
-test6
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
deleted file mode 100644
index cd37117cb00..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
+++ /dev/null
@@ -1,8 +0,0 @@
-test6: NON_OS_IF in OS_IF
-{-if-% if OS == macos %-if-}
-test6
-{% if site == Gent %}
-test6
-{% endif %}
-test6
-{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
deleted file mode 100644
index 6a72a338527..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
+++ /dev/null
@@ -1,9 +0,0 @@
-test7: weird spacing and dashes
-	{%if OS == windows %}
-	test7
-{%- else%}
-	test7
-		{% if OS == linux%}
-test7
-	{%-endif %}
-{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
deleted file mode 100644
index dfe342ebfb1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
+++ /dev/null
@@ -1,9 +0,0 @@
-test7: weird spacing and dashes
-	{-if-%if OS == windows %-if-}
-	test7
-{-if-%- else%-if-}
-	test7
-		{-if-% if OS == linux%-if-}
-test7
-	{-if-%-endif %-if-}
-{-if-%endif%-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
deleted file mode 100644
index fb8c1f8b539..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
+++ /dev/null
@@ -1,55 +0,0 @@
-test1: OS_IF
-{% if OS == windows %}
-test1
-{% endif %}
-
-test2: OS_IF in NON_OS_IF
-{% if site == Gent %}
-test2
-{% if OS == windows %}
-test2
-{% endif %}
-{% endif %}
-
-test3: OS_IF with else
-{% if OS == linux %}
-test3
-{% else %}
-test3
-{% endif %}
-
-test4: OS_IF with wrong syntax
-{ if OS == macos }
-test4
-{ endif }
-
-test5: OS_IF in OS_IF
-{% if OS == windows %}
-test5
-{% else %}
-{% if OS == linux %}
-test5
-{% else %}
-test5
-{% endif %}
-test5
-{% endif %}
-
-test6: NON_OS_IF in OS_IF
-{% if OS == macos %}
-test6
-{% if site == Gent %}
-test6
-{% endif %}
-test6
-{% endif %}
-
-test7: weird spacing and dashes
-	{%if OS == windows %}
-	test7
-{%- else%}
-	test7
-		{% if OS == linux%}
-test7
-	{%-endif %}
-{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
deleted file mode 100644
index 796e94348fa..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
+++ /dev/null
@@ -1,55 +0,0 @@
-test1: OS_IF
-{-if-% if OS == windows %-if-}
-test1
-{-if-% endif %-if-}
-
-test2: OS_IF in NON_OS_IF
-{% if site == Gent %}
-test2
-{-if-% if OS == windows %-if-}
-test2
-{-if-% endif %-if-}
-{% endif %}
-
-test3: OS_IF with else
-{-if-% if OS == linux %-if-}
-test3
-{-if-% else %-if-}
-test3
-{-if-% endif %-if-}
-
-test4: OS_IF with wrong syntax
-{ if OS == macos }
-test4
-{ endif }
-
-test5: OS_IF in OS_IF
-{-if-% if OS == windows %-if-}
-test5
-{-if-% else %-if-}
-{-if-% if OS == linux %-if-}
-test5
-{-if-% else %-if-}
-test5
-{-if-% endif %-if-}
-test5
-{-if-% endif %-if-}
-
-test6: NON_OS_IF in OS_IF
-{-if-% if OS == macos %-if-}
-test6
-{% if site == Gent %}
-test6
-{% endif %}
-test6
-{-if-% endif %-if-}
-
-test7: weird spacing and dashes
-	{-if-%if OS == windows %-if-}
-	test7
-{-if-%- else%-if-}
-	test7
-		{-if-% if OS == linux%-if-}
-test7
-	{-if-%-endif %-if-}
-{-if-%endif%-if-}
\ No newline at end of file

From 4d7db8f889decbcf157ef08c55912c3e269ef382 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 13:59:49 +0200
Subject: [PATCH 104/152] Revert "update arguments of if mangler"

This reverts commit dd64381efc3b7156c2905f69aec54572b2ca2c53.
---
 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
index 4d0dd876103..17053fe705c 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
@@ -18,10 +18,10 @@ def test_if_mangler(input_file, output_file):
     os.makedirs(os.path.join("if_mangled_files"), exist_ok=True)
 
     # make filepaths
-    input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file)
-    expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file)
+    input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file)
+    expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file)
     actual_output_file_path = os.path.join("if_mangled_files", input_file)
-    mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'})
+    mangle_ifs(input_file_path, input_file)
 
     # check every line
     with open(expected_output_file_path, "r") as expected_read_file:

From df9bac5031138324895fa70b6d16d82c8fa2e164 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 14:02:12 +0200
Subject: [PATCH 105/152] Revert "relocate full test files"

This reverts commit ef3fd584a21e2e417363a5f083bcf94261739ceb.
---
 .../if_mangler_1_input.md                     |  4 ++
 .../if_mangler_1_output.md                    |  4 ++
 .../if_mangler_2_input.md                     |  7 +++
 .../if_mangler_2_output.md                    |  7 +++
 .../if_mangler_3_input.md                     |  6 ++
 .../if_mangler_3_output.md                    |  6 ++
 .../if_mangler_4_input.md                     |  4 ++
 .../if_mangler_4_output.md                    |  4 ++
 .../if_mangler_5_input.md                     | 11 ++++
 .../if_mangler_5_output.md                    | 11 ++++
 .../if_mangler_6_input.md                     |  8 +++
 .../if_mangler_6_output.md                    |  8 +++
 .../if_mangler_7_input.md                     |  9 +++
 .../if_mangler_7_output.md                    |  9 +++
 .../if_mangler_test_files/if_mangler_input.md | 55 +++++++++++++++++++
 .../if_mangler_output.md                      | 55 +++++++++++++++++++
 .../test_paragraph_split_1.md                 |  0
 .../test_title_split_1.md                     |  0
 18 files changed, 208 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_paragraph_split => }/test_paragraph_split_1.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_title_split => }/test_title_split_1.md (100%)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
new file mode 100644
index 00000000000..6a74b3c0181
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
@@ -0,0 +1,4 @@
+test1: OS_IF
+{% if OS == windows %}
+test1
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
new file mode 100644
index 00000000000..2f9cdc38294
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
@@ -0,0 +1,4 @@
+test1: OS_IF
+{-if-% if OS == windows %-if-}
+test1
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
new file mode 100644
index 00000000000..360a4a59ba3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
@@ -0,0 +1,7 @@
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{% if OS == windows %}
+test2
+{% endif %}
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
new file mode 100644
index 00000000000..798dcf6db24
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
@@ -0,0 +1,7 @@
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{-if-% if OS == windows %-if-}
+test2
+{-if-% endif %-if-}
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
new file mode 100644
index 00000000000..d93125a5971
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
@@ -0,0 +1,6 @@
+test3: OS_IF with else
+{% if OS == linux %}
+test3
+{% else %}
+test3
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
new file mode 100644
index 00000000000..02141961338
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
@@ -0,0 +1,6 @@
+test3: OS_IF with else
+{-if-% if OS == linux %-if-}
+test3
+{-if-% else %-if-}
+test3
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
new file mode 100644
index 00000000000..cc15fae1df1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
@@ -0,0 +1,4 @@
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
new file mode 100644
index 00000000000..cc15fae1df1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
@@ -0,0 +1,4 @@
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
new file mode 100644
index 00000000000..bdb288474e2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
@@ -0,0 +1,11 @@
+test5: OS_IF in OS_IF
+{% if OS == windows %}
+test5
+{% else %}
+{% if OS == linux %}
+test5
+{% else %}
+test5
+{% endif %}
+test5
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
new file mode 100644
index 00000000000..10443eb67a4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
@@ -0,0 +1,11 @@
+test5: OS_IF in OS_IF
+{-if-% if OS == windows %-if-}
+test5
+{-if-% else %-if-}
+{-if-% if OS == linux %-if-}
+test5
+{-if-% else %-if-}
+test5
+{-if-% endif %-if-}
+test5
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
new file mode 100644
index 00000000000..0731ee3588c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
@@ -0,0 +1,8 @@
+test6: NON_OS_IF in OS_IF
+{% if OS == macos %}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
new file mode 100644
index 00000000000..cd37117cb00
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
@@ -0,0 +1,8 @@
+test6: NON_OS_IF in OS_IF
+{-if-% if OS == macos %-if-}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
new file mode 100644
index 00000000000..6a72a338527
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
@@ -0,0 +1,9 @@
+test7: weird spacing and dashes
+	{%if OS == windows %}
+	test7
+{%- else%}
+	test7
+		{% if OS == linux%}
+test7
+	{%-endif %}
+{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
new file mode 100644
index 00000000000..dfe342ebfb1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
@@ -0,0 +1,9 @@
+test7: weird spacing and dashes
+	{-if-%if OS == windows %-if-}
+	test7
+{-if-%- else%-if-}
+	test7
+		{-if-% if OS == linux%-if-}
+test7
+	{-if-%-endif %-if-}
+{-if-%endif%-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
new file mode 100644
index 00000000000..fb8c1f8b539
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
@@ -0,0 +1,55 @@
+test1: OS_IF
+{% if OS == windows %}
+test1
+{% endif %}
+
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{% if OS == windows %}
+test2
+{% endif %}
+{% endif %}
+
+test3: OS_IF with else
+{% if OS == linux %}
+test3
+{% else %}
+test3
+{% endif %}
+
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
+
+test5: OS_IF in OS_IF
+{% if OS == windows %}
+test5
+{% else %}
+{% if OS == linux %}
+test5
+{% else %}
+test5
+{% endif %}
+test5
+{% endif %}
+
+test6: NON_OS_IF in OS_IF
+{% if OS == macos %}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{% endif %}
+
+test7: weird spacing and dashes
+	{%if OS == windows %}
+	test7
+{%- else%}
+	test7
+		{% if OS == linux%}
+test7
+	{%-endif %}
+{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
new file mode 100644
index 00000000000..796e94348fa
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
@@ -0,0 +1,55 @@
+test1: OS_IF
+{-if-% if OS == windows %-if-}
+test1
+{-if-% endif %-if-}
+
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{-if-% if OS == windows %-if-}
+test2
+{-if-% endif %-if-}
+{% endif %}
+
+test3: OS_IF with else
+{-if-% if OS == linux %-if-}
+test3
+{-if-% else %-if-}
+test3
+{-if-% endif %-if-}
+
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
+
+test5: OS_IF in OS_IF
+{-if-% if OS == windows %-if-}
+test5
+{-if-% else %-if-}
+{-if-% if OS == linux %-if-}
+test5
+{-if-% else %-if-}
+test5
+{-if-% endif %-if-}
+test5
+{-if-% endif %-if-}
+
+test6: NON_OS_IF in OS_IF
+{-if-% if OS == macos %-if-}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{-if-% endif %-if-}
+
+test7: weird spacing and dashes
+	{-if-%if OS == windows %-if-}
+	test7
+{-if-%- else%-if-}
+	test7
+		{-if-% if OS == linux%-if-}
+test7
+	{-if-%-endif %-if-}
+{-if-%endif%-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md

From 631d9e9c26945359eb25ce08a37cd424061c2407 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 14:08:22 +0200
Subject: [PATCH 106/152] update test to adapt to new arguments in if mangler

---
 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
index 17053fe705c..4d0dd876103 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
@@ -18,10 +18,10 @@ def test_if_mangler(input_file, output_file):
     os.makedirs(os.path.join("if_mangled_files"), exist_ok=True)
 
     # make filepaths
-    input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file)
-    expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file)
+    input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file)
+    expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file)
     actual_output_file_path = os.path.join("if_mangled_files", input_file)
-    mangle_ifs(input_file_path, input_file)
+    mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'})
 
     # check every line
     with open(expected_output_file_path, "r") as expected_read_file:

From c6e600dcbdf9885b41cd8cbd07917a92d2b423a6 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 14:15:25 +0200
Subject: [PATCH 107/152] relocated full test files

---
 .../{ => full_test_paragraph_split}/test_paragraph_split_1.md     | 0
 .../test_files/{ => full_test_title_split}/test_title_split_1.md  | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_paragraph_split}/test_paragraph_split_1.md (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_title_split}/test_title_split_1.md (100%)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md

From d1c6194e8aa75301f7e3b1a2396eb13538de063d Mon Sep 17 00:00:00 2001
From: EwDa291 <100782488+EwDa291@users.noreply.github.com>
Date: Mon, 26 Aug 2024 14:17:42 +0200
Subject: [PATCH 108/152] Rename test_paragraph_split_1.md to
 test_paragraph_split_1_input.md

---
 .../full_test_paragraph_split/test_paragraph_split_1.md          | 0
 .../full_test_paragraph_split/test_paragraph_split_1_input.md    | 1 +
 2 files changed, 1 insertion(+)
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md
new file mode 100644
index 00000000000..d3f5a12faa9
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md
@@ -0,0 +1 @@
+

From 695ffd635a61ff44514232a1b37f55198100f0bd Mon Sep 17 00:00:00 2001
From: EwDa291 <100782488+EwDa291@users.noreply.github.com>
Date: Mon, 26 Aug 2024 14:18:00 +0200
Subject: [PATCH 109/152] Rename test_title_split_1.md to
 test_title_split_1_input.md

---
 .../{test_title_split_1.md => test_title_split_1_input.md}      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/{test_title_split_1.md => test_title_split_1_input.md} (98%)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md
similarity index 98%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md
index 9b810c3f41a..5065852e2a1 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md
@@ -28,4 +28,4 @@ blablabla generic with a [link](generic.md)
 
 ## Subtitle 5 generic
 
-blablabla
\ No newline at end of file
+blablabla

From af4832b5a8cd50bc790353a232fcca5e51e35e90 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 15:08:37 +0200
Subject: [PATCH 110/152] smal fix

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 26cf15b79a2..db2c5e84257 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -825,7 +825,8 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
 
         # add first subtitle in front of section again
         if len(jinja_text) != 0:
-            jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text
+            if options[SPLIT_ON_TITLES]:
+                jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text
 
             # re-adjust text to correct overcorrections
             jinja_text = re.sub('"' + OS + '"', OS, jinja_text)
@@ -951,7 +952,7 @@ def main(options):
 
         # create directories for the source markdown file
         for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
-            os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True)
+            os.makedirs(directory, exist_ok=True)
 
         # process the jinja macros
         jinja_parser(filename, copy_file, options)

From 8805c8c01b6efe814fae4bba5f4b05f9e9d8beb2 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 15:11:52 +0200
Subject: [PATCH 111/152] test text for paragraph split

---
 .../test_paragraph_split_1_input.md           | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md
index d3f5a12faa9..44ac82c795d 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md
@@ -1 +1,43 @@
+# Main title
 
+This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md).
+It also contains some `other` *Markdown* _syntax_ and an
+```shell
+example code block.
+```
+This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum
+character limit for a section).
+
+## OS specific sections
+
+This is the second section, it is the start of some {% if OS == windows %} text specific to windows.
+In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer 
+to make sure we get a long section that is over the minimum required length for the next newline character to be 
+classified as the end of this section. I am doing this because for the next sections I want to test whether they will be
+grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, 
+let's add [a link](windows.md) in this section as well.
+
+### Windows specific section
+
+Like this.
+
+And this.
+
+And also this.
+
+These section should all be grouped together under the windows specific section of the output. The addition of this long
+section at the end should make sure the combination of sections comes to an end here.
+{% else %}
+text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will
+still add [a link](linuxmacos.md).
+
+### Non Windows section
+
+Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
+section that ends right here.
+{% endif %}
+
+## Conclusion
+
+Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I 
+might add to this if needed.

From a265ffd87121d3d195670cc76f8d94b4b8bcc009 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Mon, 26 Aug 2024 16:52:20 +0200
Subject: [PATCH 112/152] start of a fix for double title problem, not done yet

---
 .../chatbot_parser.py                         | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index db2c5e84257..c0b91319912 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -401,11 +401,13 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
     # metadata title
     metadata_title = main_title
 
+    # TODO: define metadata data if split occurs on paragraphs and last_title and title_level are known (placeholder in place right now)
+    if current_paragraph_number != -1:
+        last_title_level = 5
+        last_dir = "PLACEHOLDER"
+
     # list to keep track of most recent directories on each title level
-    if LINUX_TUTORIAL not in file:
-        curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
-    else:
-        curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)]
+    curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
 
     with open(file, 'r') as readfile:
 
@@ -885,7 +887,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
             pass
 
 
-def main(options):
+def main(options, verbose=True):
     """
     main function
 
@@ -896,10 +898,11 @@ def main(options):
                     MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text,
                     INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext,
                     DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not}
+    :param verbose: boolean indicating whether print statements from the main function should be print, only used when for testing
     :return:
     """
 
-    if options[DEEP_DIRECTORIES]:
+    if options[DEEP_DIRECTORIES] and verbose:
         print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
 
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
@@ -915,7 +918,7 @@ def main(options):
 
     ################### define loop-invariant variables ###################
 
-    # constant that keeps track of the source directories
+    # constant that keeps track of the source directory
     source_directory = options[SOURCE_DIRECTORY]
 
     # list of all the filenames
@@ -952,7 +955,7 @@ def main(options):
 
         # create directories for the source markdown file
         for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
-            os.makedirs(directory, exist_ok=True)
+            os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True)
 
         # process the jinja macros
         jinja_parser(filename, copy_file, options)
@@ -978,7 +981,8 @@ def main(options):
     if os.path.exists(TEMP_JINJA_FILE):
         os.remove(TEMP_JINJA_FILE)
 
-    print("Parsing finished successfully")
+    if verbose:
+        print("Parsing finished successfully")
 
 
 ################### run the script ###################

From 6c2a61c25215cf3d5c942c6c2de7804baf725584 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 09:51:47 +0200
Subject: [PATCH 113/152] Fix for double title bug when splitting on paragraph

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index c0b91319912..72ebbcee3ab 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -825,10 +825,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
         template = Template(text)
         jinja_text = template.render(OS=OS)
 
-        # add first subtitle in front of section again
         if len(jinja_text) != 0:
-            if options[SPLIT_ON_TITLES]:
-                jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text
+
+            # add first subtitle in front of section again
+            if options[SPLIT_ON_TITLES] or metadata[SUBTITLE].replace("-", " ") not in jinja_text[:len(metadata[SUBTITLE]) + 1]:
+                jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text
 
             # re-adjust text to correct overcorrections
             jinja_text = re.sub('"' + OS + '"', OS, jinja_text)

From ed088794e1b6ceb5b805c87a82bcd31df6931299 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 10:05:56 +0200
Subject: [PATCH 114/152] Fix bug for empty linklist in metadata

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 72ebbcee3ab..dfa2972b9fc 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -741,6 +741,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
             writefile.write(text)
 
     # write metadata
+    # check if links in metadata is not empty
+    if LINKS in metadata.keys() and len(metadata[LINKS].keys()) == 0:
+        del metadata[LINKS]
+
     # add previous subtitle
     if title_order_number != 0:
         metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1]
@@ -830,6 +834,8 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
             # add first subtitle in front of section again
             if options[SPLIT_ON_TITLES] or metadata[SUBTITLE].replace("-", " ") not in jinja_text[:len(metadata[SUBTITLE]) + 1]:
                 jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text
+            else:
+                jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text
 
             # re-adjust text to correct overcorrections
             jinja_text = re.sub('"' + OS + '"', OS, jinja_text)

From 176af130ab9837f3d28511bcf113aeb38bed1c9b Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 10:33:17 +0200
Subject: [PATCH 115/152] fix bug where too many directories were sometimes
 created

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index dfa2972b9fc..b0bacbbca17 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -962,7 +962,7 @@ def main(options, verbose=True):
 
         # create directories for the source markdown file
         for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
-            os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True)
+            os.makedirs(directory, exist_ok=True)
 
         # process the jinja macros
         jinja_parser(filename, copy_file, options)

From d4ceac8962b2bf61def602b5dad3ecfc7d12bc1e Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 10:41:49 +0200
Subject: [PATCH 116/152] test of full script, test files not ready to be
 pushed yet

---
 .../tests/test_full_script.py                 | 66 +++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_full_script.py

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
new file mode 100644
index 00000000000..61a6f3f1bdf
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
@@ -0,0 +1,66 @@
+import pytest
+import os
+import shutil
+from chatbot_parser import main
+
+
+@pytest.mark.parametrize("input_directory,actual_output_directory,expected_output_directory, options", [
+    ("tests/test_files/ftps", "tests/test_files/ftps/actual",
+     "tests/test_files/ftps/output",
+     {"SOURCE_DIRECTORY": "tests/test_files/ftps",
+      "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual",
+      "SPLIT_ON_TITLES": False,
+      "SPLIT_ON_PARAGRAPHS": True,
+      "MIN_PARAGRAPH_LENGTH": 160,
+      "MAX_TITLE_DEPTH": 4,
+      "INCLUDE_LINKS_IN_PLAINTEXT": False,
+      "DEEP_DIRECTORIES": False}
+     ),
+    ("tests/test_files/ftts", "tests/test_files/ftts/actual",
+     "tests/test_files/ftts/output",
+     {"SOURCE_DIRECTORY": "tests/test_files/ftts",
+      "DESTINATION_DIRECTORY": "tests/test_files/ftts/actual",
+      "SPLIT_ON_TITLES": True,
+      "SPLIT_ON_PARAGRAPHS": False,
+      "MIN_PARAGRAPH_LENGTH": 160,
+      "MAX_TITLE_DEPTH": 4,
+      "INCLUDE_LINKS_IN_PLAINTEXT": False,
+      "DEEP_DIRECTORIES": True}
+     )
+])
+def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options):
+    # run the script
+    main(options, verbose=False)
+
+    # Compare directories and files
+    for dirpath, dirnames, filenames in os.walk(expected_output_directory):
+        relative_path = os.path.relpath(dirpath, expected_output_directory)
+        actual_dir = os.path.join(actual_output_directory, relative_path)
+
+        # Check if the directory exists
+        assert os.path.isdir(actual_dir), f"Directory '{actual_dir}' is missing."
+
+        # Check for files
+        for filename in filenames:
+            ref_file = os.path.join(dirpath, filename)
+            gen_file = os.path.join(actual_dir, filename)
+
+            # Check if the file exists
+            assert os.path.isfile(gen_file), f"File '{gen_file}' is missing."
+
+            # Check file content
+            with open(ref_file, 'r') as ref_f, open(gen_file, 'r') as gen_f:
+                ref_content = ref_f.read().strip()
+                gen_content = gen_f.read().strip()
+                assert ref_content == gen_content, f"Content of file '{gen_file}' does not match."
+
+    # check that not too many directories have been generated
+    for dirpath, dirnames, filenames in os.walk(actual_output_directory):
+        relative_path = os.path.relpath(dirpath, actual_output_directory)
+        expected_dir = os.path.join(expected_output_directory, relative_path)
+
+        # Check if the directory exists
+        assert os.path.isdir(expected_dir), f"Directory '{relative_path}' was made, but shouldn't have been."
+
+    # remove directory
+    shutil.rmtree(actual_output_directory, ignore_errors=True)

From 815a863fc83f37bfa49976ca14ce23e63e3fafa4 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 12:18:12 +0200
Subject: [PATCH 117/152] updated requirements.txt

---
 scripts/HPC_chatbot_preprocessor/requirements.txt | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt
index 907f08fda77..4d27d462460 100644
--- a/scripts/HPC_chatbot_preprocessor/requirements.txt
+++ b/scripts/HPC_chatbot_preprocessor/requirements.txt
@@ -1,7 +1,2 @@
-os
-re
-shutil
-pypandoc
-yaml
-jinja2
-pathlib
\ No newline at end of file
+PyYAML==6.0.2
+Jinja2==3.1.4
\ No newline at end of file

From d15469f420a86edeabda1472497c38206b53351d Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 12:19:05 +0200
Subject: [PATCH 118/152] updated docstring in main function

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index b0bacbbca17..698278da90d 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -899,7 +899,9 @@ def main(options, verbose=True):
     main function
 
     :param options: dictionary containing the options specified by the user to run the script:
-                    {SPLIT_ON_TITLES: boolean indicating whether to split on titles,
+                    {SOURCE_DIRECTORY: The source directory where the original files are located,
+                    DESTINATION_DIRECTORY: The destination directory where the processed files should be written to,
+                    SPLIT_ON_TITLES: boolean indicating whether to split on titles,
                     SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES),
                     MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph,
                     MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text,

From daa6b36e07854f1b41b5907339bf283218d93a2c Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 12:32:09 +0200
Subject: [PATCH 119/152] add support for comments for the bot to be included
 in the source files

---
 scripts/HPC_chatbot_preprocessor/README.md         |  8 +++++++-
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 11 +++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index bc2922aaf5a..2cb30bdc985 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -149,7 +149,7 @@ This will also result in the parser "forgetting" it opened an os-specific if-sta
 
 The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. 
 
-### Markdown comments
+### Comments
 
 Any comments within the markdown files (for example TODO's) should follow the following syntax:
 
@@ -158,6 +158,12 @@ Any comments within the markdown files (for example TODO's) should follow the fo
 ```
  and should be limited to one line.
 
+Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such.
+
+```
+<!--INPUT_FOR_BOTyour comment for the bot-->
+```
+
 ### Long filepaths
 
 Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`.
diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 698278da90d..338cdef32f5 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -83,7 +83,7 @@
 ELSE = "else"
 ENDIF = "endif"
 
-# link indicators
+# link indicator
 LINK_MARKER = r'§link§link§'
 
 # HTML tags
@@ -101,6 +101,9 @@
 _PARAGRAPH_ = "_paragraph_"
 METADATA_EXTENSION = "_metadata"
 
+# Marker for comments for the bot
+INPUT_FOR_BOT = "INPUT_FOR_BOT"
+
 
 ################### define functions ###################
 
@@ -184,7 +187,11 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
             elif any(re.match(pattern, content) for pattern in html_tags_style):
                 curr_line = re.sub(r'<.*?>', "", curr_line)
 
-            # drop markdown comments
+            # keep comments for bot
+            elif re.fullmatch(r'!--' + INPUT_FOR_BOT + r'.*?--', content):
+                curr_line = re.sub(r'<!--' + INPUT_FOR_BOT + r'(.*?)-->', lambda m: m.group(1), curr_line)
+
+            # drop comments
             elif re.fullmatch(r'!--.*?--', content):
                 curr_line = re.sub(r'<.*?>', "", curr_line)
 

From 4c19f442e2e4f6af1f2448e26cf0b1b29e4522ac Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 13:30:01 +0200
Subject: [PATCH 120/152] changed the default for min paragraph length

---
 scripts/HPC_chatbot_preprocessor/README.md         | 2 +-
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 2cb30bdc985..b3bce665973 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -36,7 +36,7 @@ Including this option will split the source files based on the titles and subtit
 
 #### `pl`/`min_paragraph_length`
 
-This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 160 characters. This options only works if `split_on_titles` is not enabled.
+This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 683 characters. This options only works if `split_on_titles` is not enabled.
 
 #### `td`/`max_title_depth`
 
diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 338cdef32f5..a041160c855 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -1009,7 +1009,7 @@ def main(options, verbose=True):
     parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located")
     parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to")
     parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.")
-    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is disabled (default: 160)")
+    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=683, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)")
     parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)")
     parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
     parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled")

From 9a6ff5814422fc2ea0d4a128407302572d964105 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 13:31:40 +0200
Subject: [PATCH 121/152] added test files for full script test

---
 .../generic/tps1/tps1_paragraph_1.txt         |  6 ++
 .../tps1/tps1_paragraph_1_metadata.json       | 14 +++
 .../generic/tps1/tps1_paragraph_3.txt         |  3 +
 .../tps1/tps1_paragraph_3_metadata.json       | 11 +++
 .../linux/tps1/tps1_linux_paragraph_2.1.txt   |  4 +
 .../tps1_linux_paragraph_2.1_metadata.json    | 14 +++
 .../linux/tps1/tps1_linux_paragraph_2.2.txt   |  3 +
 .../tps1_linux_paragraph_2.2_metadata.json    | 11 +++
 .../macos/tps1/tps1_macos_paragraph_2.1.txt   |  4 +
 .../tps1_macos_paragraph_2.1_metadata.json    | 14 +++
 .../macos/tps1/tps1_macos_paragraph_2.2.txt   |  3 +
 .../tps1_macos_paragraph_2.2_metadata.json    | 11 +++
 .../tps1/tps1_windows_paragraph_2.1.txt       |  7 ++
 .../tps1_windows_paragraph_2.1_metadata.json  | 14 +++
 .../tps1/tps1_windows_paragraph_2.2.txt       |  6 ++
 .../tps1_windows_paragraph_2.2_metadata.json  | 11 +++
 .../tps1.md}                                  | 86 +++++++++----------
 .../tts1/Main-title/Subtitle-1/Subtitle-1.txt |  2 +
 .../Subtitle-1/Subtitle-1_metadata.json       | 11 +++
 .../Main-title/Subtitle-5-g/Subtitle-5-g.txt  |  1 +
 .../Subtitle-5-g/Subtitle-5-g_metadata.json   | 11 +++
 .../Main-title/Subtitle-2-g/Subtitle-2-g.txt  |  4 +
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   | 14 +++
 .../Subtitle-4-l&m/Subtitle-4-l&m.txt         |  3 +
 .../Subtitle-4-l&m_metadata.json              | 14 +++
 .../Main-title/Subtitle-2-g/Subtitle-2-g.txt  |  4 +
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   | 14 +++
 .../Subtitle-4-l&m/Subtitle-4-l&m.txt         |  3 +
 .../Subtitle-4-l&m_metadata.json              | 14 +++
 .../Main-title/Subtitle-2-g/Subtitle-2-g.txt  |  4 +
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   | 14 +++
 .../Subtitle-3-w/Subtitle-3-w.txt             |  3 +
 .../Subtitle-3-w/Subtitle-3-w_metadata.json   | 14 +++
 .../tts1.md}                                  |  8 +-
 34 files changed, 313 insertions(+), 47 deletions(-)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_paragraph_split/test_paragraph_split_1_input.md => ftps/tps1.md} (97%)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_title_split/test_title_split_1_input.md => ftts/tts1.md} (76%)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
new file mode 100644
index 00000000000..94270ff37e3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
@@ -0,0 +1,6 @@
+Main title
+This is the first paragraph of text. It is non-os-specific, however it does contain a link.
+It also contains some other Markdown syntax and an
+example code block.
+This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum
+character limit for a section).
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
new file mode 100644
index 00000000000..19e44fad91d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Main-title",
+    "title_depth": 1,
+    "directory": "tps1",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/generic"
+    },
+    "parent_title": "",
+    "previous_title": null,
+    "next_title": "tps1_paragraph_2",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
new file mode 100644
index 00000000000..58eedc06aa0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
@@ -0,0 +1,3 @@
+Conclusion
+Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I 
+might add to this if needed.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
new file mode 100644
index 00000000000..b4c98ff6465
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Conclusion",
+    "title_depth": 2,
+    "directory": "tps1",
+    "parent_title": "",
+    "previous_title": "tps1_paragraph_2",
+    "next_title": null,
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
new file mode 100644
index 00000000000..d0ee9ce8256
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
@@ -0,0 +1,4 @@
+OS specific sections
+This is the second section, it is the start of some 
+text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will
+still add a link.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
new file mode 100644
index 00000000000..bac81ed87e3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tps1",
+    "subtitle": "OS-specific-sections",
+    "title_depth": 2,
+    "directory": "tps1",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/linuxmacos"
+    },
+    "previous_title": "tps1_paragraph_1",
+    "next_title": "tps1_linux_paragraph_2.2",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
new file mode 100644
index 00000000000..1a3867e69fa
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
@@ -0,0 +1,3 @@
+Non Windows section
+Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
+section that ends right here.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
new file mode 100644
index 00000000000..522265436ab
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Non-Windows-section",
+    "title_depth": 3,
+    "directory": "tps1",
+    "parent_title": "OS-specific-sections",
+    "previous_title": "tps1_linux_paragraph_2.1",
+    "next_title": "tps1_paragraph_3",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
new file mode 100644
index 00000000000..e0642d6ac96
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
@@ -0,0 +1,4 @@
+OS specific sections
+This is the second section, it is the start of some 
+text specific to OSes that aren't "windows". I feel like there is no need to make this section very long, however I will
+still add a link.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
new file mode 100644
index 00000000000..5d9ec163f99
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tps1",
+    "subtitle": "OS-specific-sections",
+    "title_depth": 2,
+    "directory": "tps1",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/linuxmacos"
+    },
+    "previous_title": "tps1_paragraph_1",
+    "next_title": "tps1_macos_paragraph_2.2",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
new file mode 100644
index 00000000000..1a3867e69fa
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
@@ -0,0 +1,3 @@
+Non Windows section
+Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
+section that ends right here.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
new file mode 100644
index 00000000000..7b06f06efdd
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Non-Windows-section",
+    "title_depth": 3,
+    "directory": "tps1",
+    "parent_title": "OS-specific-sections",
+    "previous_title": "tps1_macos_paragraph_2.1",
+    "next_title": "tps1_paragraph_3",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
new file mode 100644
index 00000000000..9a9cbe1f3d2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
@@ -0,0 +1,7 @@
+OS specific sections
+This is the second section, it is the start of some  text specific to windows.
+In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer 
+to make sure we get a long section that is over the minimum required length for the next newline character to be 
+classified as the end of this section. I am doing this because for the next sections I want to test whether they will be
+grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, 
+let's add a link in this section as well.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
new file mode 100644
index 00000000000..e8e50aa6c32
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tps1",
+    "subtitle": "OS-specific-sections",
+    "title_depth": 2,
+    "directory": "tps1",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/windows"
+    },
+    "previous_title": "tps1_paragraph_1",
+    "next_title": "tps1_windows_paragraph_2.2",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
new file mode 100644
index 00000000000..6b57235f68f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
@@ -0,0 +1,6 @@
+Windows specific section
+Like this.
+And this.
+And also this.
+These section should all be grouped together under the windows specific section of the output. The addition of this long
+section at the end should make sure the combination of sections comes to an end here.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
new file mode 100644
index 00000000000..84ea6ad53f9
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Windows-specific-section",
+    "title_depth": 3,
+    "directory": "tps1",
+    "parent_title": "OS-specific-sections",
+    "previous_title": "tps1_windows_paragraph_2.1",
+    "next_title": "tps1_paragraph_3",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md
similarity index 97%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md
index 44ac82c795d..d9b10d0c524 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md
@@ -1,43 +1,43 @@
-# Main title
-
-This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md).
-It also contains some `other` *Markdown* _syntax_ and an
-```shell
-example code block.
-```
-This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum
-character limit for a section).
-
-## OS specific sections
-
-This is the second section, it is the start of some {% if OS == windows %} text specific to windows.
-In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer 
-to make sure we get a long section that is over the minimum required length for the next newline character to be 
-classified as the end of this section. I am doing this because for the next sections I want to test whether they will be
-grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, 
-let's add [a link](windows.md) in this section as well.
-
-### Windows specific section
-
-Like this.
-
-And this.
-
-And also this.
-
-These section should all be grouped together under the windows specific section of the output. The addition of this long
-section at the end should make sure the combination of sections comes to an end here.
-{% else %}
-text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will
-still add [a link](linuxmacos.md).
-
-### Non Windows section
-
-Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
-section that ends right here.
-{% endif %}
-
-## Conclusion
-
-Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I 
-might add to this if needed.
+# Main title
+
+This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md).
+It also contains some `other` *Markdown* _syntax_ and an
+```shell
+example code block.
+```
+This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum
+character limit for a section).
+
+## OS specific sections
+
+This is the second section, it is the start of some {% if OS == windows %} text specific to windows.
+In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer 
+to make sure we get a long section that is over the minimum required length for the next newline character to be 
+classified as the end of this section. I am doing this because for the next sections I want to test whether they will be
+grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, 
+let's add [a link](windows.md) in this section as well.
+
+### Windows specific section
+
+Like this.
+
+And this.
+
+And also this.
+
+These section should all be grouped together under the windows specific section of the output. The addition of this long
+section at the end should make sure the combination of sections comes to an end here.
+{% else %}
+text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will
+still add [a link](linuxmacos.md).
+
+### Non Windows section
+
+Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
+section that ends right here.
+{% endif %}
+
+## Conclusion
+
+Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I 
+might add to this if needed.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt
new file mode 100644
index 00000000000..f62a4f31fee
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt
@@ -0,0 +1,2 @@
+blablabla
+blablablabla
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
new file mode 100644
index 00000000000..9fdbce652bf
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-1",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-1",
+    "parent_title": "Main-title",
+    "previous_title": "Main-title",
+    "next_title": "Subtitle-2-g",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-1"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt
new file mode 100644
index 00000000000..bdf68551202
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt
@@ -0,0 +1 @@
+blablabla
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
new file mode 100644
index 00000000000..b48bcaaa08c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-5-g",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-5-g",
+    "parent_title": "Main-title",
+    "previous_title": "Subtitle-2-g",
+    "next_title": null,
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-5-g"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
new file mode 100644
index 00000000000..48125d91679
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
@@ -0,0 +1,4 @@
+blablabla generic
+blablabla generic
+blablabla Linux macOS
+blablablabla Linux macOS with a link
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
new file mode 100644
index 00000000000..a2b68c8865e
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-2-g",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-2-g",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/linuxmacos"
+    },
+    "previous_title": "Subtitle-1",
+    "next_title": "Subtitle-4-l&m",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-2-g"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
new file mode 100644
index 00000000000..b221f26074b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
@@ -0,0 +1,3 @@
+blablabla Linux macOS
+blablablabla Linux macOS
+blablabla generic with a link
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
new file mode 100644
index 00000000000..537541e2cb0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-4-l&m",
+    "title_depth": 3,
+    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
+    "parent_title": "Subtitle-2-g",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/generic"
+    },
+    "previous_title": "Subtitle-2-g",
+    "next_title": "Subtitle-5-g",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-4-lm"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
new file mode 100644
index 00000000000..48125d91679
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
@@ -0,0 +1,4 @@
+blablabla generic
+blablabla generic
+blablabla Linux macOS
+blablablabla Linux macOS with a link
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
new file mode 100644
index 00000000000..6846da26b72
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-2-g",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-2-g",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/linuxmacos"
+    },
+    "previous_title": "Subtitle-1",
+    "next_title": "Subtitle-4-l&m",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-2-g"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
new file mode 100644
index 00000000000..b221f26074b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
@@ -0,0 +1,3 @@
+blablabla Linux macOS
+blablablabla Linux macOS
+blablabla generic with a link
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
new file mode 100644
index 00000000000..4e167b116d2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-4-l&m",
+    "title_depth": 3,
+    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
+    "parent_title": "Subtitle-2-g",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/generic"
+    },
+    "previous_title": "Subtitle-2-g",
+    "next_title": "Subtitle-5-g",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-4-lm"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
new file mode 100644
index 00000000000..f9f20592832
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
@@ -0,0 +1,4 @@
+blablabla generic
+blablabla generic
+blablabla windows
+blablabla windows with a link
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
new file mode 100644
index 00000000000..c4620a94080
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-2-g",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-2-g",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/windows"
+    },
+    "previous_title": "Subtitle-1",
+    "next_title": "Subtitle-3-w",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-2-g"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt
new file mode 100644
index 00000000000..0b587cef85a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt
@@ -0,0 +1,3 @@
+blablabla windows
+blablablabla windows
+blablabla generic with a link
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
new file mode 100644
index 00000000000..aa4b6317ce6
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-3-w",
+    "title_depth": 3,
+    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w",
+    "parent_title": "Subtitle-2-g",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/generic"
+    },
+    "previous_title": "Subtitle-2-g",
+    "next_title": "Subtitle-5-g",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-3-w"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md
similarity index 76%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md
index 5065852e2a1..2f3ad7f9c08 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md
@@ -5,27 +5,27 @@
 blablabla
 blablablabla
 
-### Subtitle 2 partly generic
+## Subtitle 2 g
 
 blablabla generic
 blablabla generic
 {% if OS == windows %}blablabla windows
 blablabla windows with a [link](windows.md)
 
-#### Subtitle 3 Windows specific
+### Subtitle 3 w
 
 blablabla windows
 blablablabla windows
 {% else %}blablabla Linux macOS
 blablablabla Linux macOS with a [link](linuxmacos.md)
 
-#### Subtitle 4 Linux and macOS specific
+### Subtitle 4 l&m
 
 blablabla Linux macOS
 blablablabla Linux macOS
 {% endif %}
 blablabla generic with a [link](generic.md)
 
-## Subtitle 5 generic
+## Subtitle 5 g
 
 blablabla

From 56543f03ddbba5df7477e78468c8a9e46e92f227 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 13:41:23 +0200
Subject: [PATCH 122/152] small fix for double title bug

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index a041160c855..1530eedf31c 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -839,7 +839,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
         if len(jinja_text) != 0:
 
             # add first subtitle in front of section again
-            if options[SPLIT_ON_TITLES] or metadata[SUBTITLE].replace("-", " ") not in jinja_text[:len(metadata[SUBTITLE]) + 1]:
+            if options[SPLIT_ON_TITLES] or metadata[SUBTITLE] not in make_valid_title(jinja_text[:len(metadata[SUBTITLE]) + 1]):
                 jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text
             else:
                 jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text

From 52a3861bec953f687c6317a1e180f9c27124d304 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 13:44:13 +0200
Subject: [PATCH 123/152] added examples of output of the script when splitting
 on paragraphs with a min_paragraph_length of 683

---
 .../generic/account/account_paragraph_1.txt   | 13 +++++++
 .../generic/account/account_paragraph_10.txt  | 19 ++++++++++
 .../account_paragraph_10_metadata.json        | 11 ++++++
 .../generic/account/account_paragraph_12.txt  | 17 +++++++++
 .../account_paragraph_12_metadata.json        | 11 ++++++
 .../account/account_paragraph_1_metadata.json | 14 ++++++++
 .../generic/account/account_paragraph_2.txt   |  6 ++++
 .../account/account_paragraph_2_metadata.json | 16 +++++++++
 .../generic/account/account_paragraph_3.txt   | 11 ++++++
 .../account/account_paragraph_3_metadata.json | 11 ++++++
 .../generic/account/account_paragraph_8.txt   | 13 +++++++
 .../account/account_paragraph_8_metadata.json | 11 ++++++
 .../connecting/connecting_paragraph_1.txt     | 13 +++++++
 .../connecting/connecting_paragraph_14.txt    |  7 ++++
 .../connecting_paragraph_14_metadata.json     | 14 ++++++++
 .../connecting/connecting_paragraph_15.txt    | 12 +++++++
 .../connecting_paragraph_15_metadata.json     | 15 ++++++++
 .../connecting_paragraph_1_metadata.json      | 14 ++++++++
 .../connecting/connecting_paragraph_2.txt     | 14 ++++++++
 .../connecting_paragraph_2_metadata.json      | 11 ++++++
 .../connecting/connecting_paragraph_3.txt     | 12 +++++++
 .../connecting_paragraph_3_metadata.json      | 11 ++++++
 .../connecting/connecting_paragraph_6.txt     | 16 +++++++++
 .../connecting_paragraph_6_metadata.json      | 11 ++++++
 .../connecting/connecting_paragraph_7.txt     | 22 ++++++++++++
 .../connecting_paragraph_7_metadata.json      | 11 ++++++
 .../connecting/connecting_paragraph_8.txt     | 13 +++++++
 .../connecting_paragraph_8_metadata.json      | 14 ++++++++
 .../connecting/connecting_paragraph_9.txt     | 27 ++++++++++++++
 .../connecting_paragraph_9_metadata.json      | 11 ++++++
 .../account/account_linux_paragraph_11.1.txt  | 11 ++++++
 ...account_linux_paragraph_11.1_metadata.json | 14 ++++++++
 .../account/account_linux_paragraph_4.1.txt   | 10 ++++++
 .../account_linux_paragraph_4.1_metadata.json | 14 ++++++++
 .../account/account_linux_paragraph_5.1.txt   | 14 ++++++++
 .../account_linux_paragraph_5.1_metadata.json | 11 ++++++
 .../account/account_linux_paragraph_5.2.txt   | 13 +++++++
 .../account_linux_paragraph_5.2_metadata.json | 11 ++++++
 .../account/account_linux_paragraph_5.3.txt   | 17 +++++++++
 .../account_linux_paragraph_5.3_metadata.json | 11 ++++++
 .../account/account_linux_paragraph_5.4.txt   | 18 ++++++++++
 .../account_linux_paragraph_5.4_metadata.json | 11 ++++++
 .../account/account_linux_paragraph_5.5.txt   |  6 ++++
 .../account_linux_paragraph_5.5_metadata.json | 11 ++++++
 .../account/account_linux_paragraph_6.1.txt   |  1 +
 .../account_linux_paragraph_6.1_metadata.json | 11 ++++++
 .../account/account_linux_paragraph_7.1.txt   | 14 ++++++++
 .../account_linux_paragraph_7.1_metadata.json | 14 ++++++++
 .../account/account_linux_paragraph_7.2.txt   |  8 +++++
 .../account_linux_paragraph_7.2_metadata.json | 11 ++++++
 .../account/account_linux_paragraph_9.1.txt   |  6 ++++
 .../account_linux_paragraph_9.1_metadata.json | 11 ++++++
 .../connecting_linux_paragraph_10.1.txt       | 35 +++++++++++++++++++
 ...necting_linux_paragraph_10.1_metadata.json | 11 ++++++
 .../connecting_linux_paragraph_11.1.txt       |  6 ++++
 ...necting_linux_paragraph_11.1_metadata.json | 11 ++++++
 .../connecting_linux_paragraph_12.1.txt       | 12 +++++++
 ...necting_linux_paragraph_12.1_metadata.json | 14 ++++++++
 .../connecting_linux_paragraph_12.2.txt       | 17 +++++++++
 ...necting_linux_paragraph_12.2_metadata.json | 11 ++++++
 .../connecting_linux_paragraph_12.3.txt       | 22 ++++++++++++
 ...necting_linux_paragraph_12.3_metadata.json | 11 ++++++
 .../connecting_linux_paragraph_12.4.txt       | 14 ++++++++
 ...necting_linux_paragraph_12.4_metadata.json | 11 ++++++
 .../connecting_linux_paragraph_12.5.txt       | 14 ++++++++
 ...necting_linux_paragraph_12.5_metadata.json | 11 ++++++
 .../connecting_linux_paragraph_12.6.txt       | 18 ++++++++++
 ...necting_linux_paragraph_12.6_metadata.json | 15 ++++++++
 .../connecting_linux_paragraph_13.1.txt       | 10 ++++++
 ...necting_linux_paragraph_13.1_metadata.json | 11 ++++++
 .../connecting_linux_paragraph_4.1.txt        |  7 ++++
 ...nnecting_linux_paragraph_4.1_metadata.json | 15 ++++++++
 .../connecting_linux_paragraph_5.1.txt        | 12 +++++++
 ...nnecting_linux_paragraph_5.1_metadata.json | 14 ++++++++
 .../connecting_linux_paragraph_5.2.txt        |  4 +++
 ...nnecting_linux_paragraph_5.2_metadata.json | 11 ++++++
 .../account/account_macos_paragraph_11.1.txt  | 11 ++++++
 ...account_macos_paragraph_11.1_metadata.json | 14 ++++++++
 .../account/account_macos_paragraph_4.1.txt   | 10 ++++++
 .../account_macos_paragraph_4.1_metadata.json | 14 ++++++++
 .../account/account_macos_paragraph_5.1.txt   | 12 +++++++
 .../account_macos_paragraph_5.1_metadata.json | 11 ++++++
 .../account/account_macos_paragraph_5.2.txt   | 13 +++++++
 .../account_macos_paragraph_5.2_metadata.json | 11 ++++++
 .../account/account_macos_paragraph_5.3.txt   | 20 +++++++++++
 .../account_macos_paragraph_5.3_metadata.json | 11 ++++++
 .../account/account_macos_paragraph_5.4.txt   | 18 ++++++++++
 .../account_macos_paragraph_5.4_metadata.json | 11 ++++++
 .../account/account_macos_paragraph_5.5.txt   |  6 ++++
 .../account_macos_paragraph_5.5_metadata.json | 11 ++++++
 .../account/account_macos_paragraph_6.1.txt   |  1 +
 .../account_macos_paragraph_6.1_metadata.json | 11 ++++++
 .../account/account_macos_paragraph_7.1.txt   | 14 ++++++++
 .../account_macos_paragraph_7.1_metadata.json | 14 ++++++++
 .../account/account_macos_paragraph_7.2.txt   |  7 ++++
 .../account_macos_paragraph_7.2_metadata.json | 11 ++++++
 .../account/account_macos_paragraph_9.1.txt   | 11 ++++++
 .../account_macos_paragraph_9.1_metadata.json | 11 ++++++
 .../connecting_macos_paragraph_10.1.txt       | 35 +++++++++++++++++++
 ...necting_macos_paragraph_10.1_metadata.json | 11 ++++++
 .../connecting_macos_paragraph_11.1.txt       |  6 ++++
 ...necting_macos_paragraph_11.1_metadata.json | 11 ++++++
 .../connecting_macos_paragraph_12.1.txt       | 12 +++++++
 ...necting_macos_paragraph_12.1_metadata.json | 14 ++++++++
 .../connecting_macos_paragraph_12.2.txt       | 17 +++++++++
 ...necting_macos_paragraph_12.2_metadata.json | 11 ++++++
 .../connecting_macos_paragraph_12.3.txt       | 22 ++++++++++++
 ...necting_macos_paragraph_12.3_metadata.json | 11 ++++++
 .../connecting_macos_paragraph_12.4.txt       | 14 ++++++++
 ...necting_macos_paragraph_12.4_metadata.json | 11 ++++++
 .../connecting_macos_paragraph_12.5.txt       | 14 ++++++++
 ...necting_macos_paragraph_12.5_metadata.json | 11 ++++++
 .../connecting_macos_paragraph_12.6.txt       | 18 ++++++++++
 ...necting_macos_paragraph_12.6_metadata.json | 15 ++++++++
 .../connecting_macos_paragraph_13.1.txt       | 12 +++++++
 ...necting_macos_paragraph_13.1_metadata.json | 11 ++++++
 .../connecting_macos_paragraph_13.2.txt       |  6 ++++
 ...necting_macos_paragraph_13.2_metadata.json | 11 ++++++
 .../connecting_macos_paragraph_4.1.txt        |  7 ++++
 ...nnecting_macos_paragraph_4.1_metadata.json | 15 ++++++++
 .../connecting_macos_paragraph_5.1.txt        | 10 ++++++
 ...nnecting_macos_paragraph_5.1_metadata.json | 14 ++++++++
 .../connecting_macos_paragraph_5.2.txt        |  7 ++++
 ...nnecting_macos_paragraph_5.2_metadata.json | 11 ++++++
 .../account_windows_paragraph_11.1.txt        | 11 ++++++
 ...count_windows_paragraph_11.1_metadata.json | 14 ++++++++
 .../account/account_windows_paragraph_4.1.txt | 14 ++++++++
 ...ccount_windows_paragraph_4.1_metadata.json | 14 ++++++++
 .../account/account_windows_paragraph_4.2.txt | 13 +++++++
 ...ccount_windows_paragraph_4.2_metadata.json | 11 ++++++
 .../account/account_windows_paragraph_4.3.txt | 13 +++++++
 ...ccount_windows_paragraph_4.3_metadata.json | 15 ++++++++
 .../account/account_windows_paragraph_4.4.txt | 17 +++++++++
 ...ccount_windows_paragraph_4.4_metadata.json | 11 ++++++
 .../account/account_windows_paragraph_4.5.txt |  7 ++++
 ...ccount_windows_paragraph_4.5_metadata.json | 11 ++++++
 .../account/account_windows_paragraph_6.1.txt | 13 +++++++
 ...ccount_windows_paragraph_6.1_metadata.json | 11 ++++++
 .../account/account_windows_paragraph_6.2.txt | 11 ++++++
 ...ccount_windows_paragraph_6.2_metadata.json | 15 ++++++++
 .../account/account_windows_paragraph_6.3.txt |  5 +++
 ...ccount_windows_paragraph_6.3_metadata.json | 11 ++++++
 .../account/account_windows_paragraph_9.1.txt |  7 ++++
 ...ccount_windows_paragraph_9.1_metadata.json | 11 ++++++
 .../connecting_windows_paragraph_10.1.txt     |  5 +++
 ...cting_windows_paragraph_10.1_metadata.json | 11 ++++++
 .../connecting_windows_paragraph_11.1.txt     | 11 ++++++
 ...cting_windows_paragraph_11.1_metadata.json | 11 ++++++
 .../connecting_windows_paragraph_11.2.txt     | 19 ++++++++++
 ...cting_windows_paragraph_11.2_metadata.json | 11 ++++++
 .../connecting_windows_paragraph_11.3.txt     |  7 ++++
 ...cting_windows_paragraph_11.3_metadata.json | 11 ++++++
 .../connecting_windows_paragraph_4.1.txt      | 11 ++++++
 ...ecting_windows_paragraph_4.1_metadata.json | 15 ++++++++
 .../connecting_windows_paragraph_4.2.txt      | 13 +++++++
 ...ecting_windows_paragraph_4.2_metadata.json | 11 ++++++
 .../connecting_windows_paragraph_4.3.txt      | 13 +++++++
 ...ecting_windows_paragraph_4.3_metadata.json | 14 ++++++++
 .../connecting_windows_paragraph_4.4.txt      | 11 ++++++
 ...ecting_windows_paragraph_4.4_metadata.json | 11 ++++++
 160 files changed, 1976 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json

diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt
new file mode 100644
index 00000000000..1b79fd22391
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt
@@ -0,0 +1,13 @@
+Getting an HPC Account
+Getting ready to request an account
+All users of AUGent can request
+an
+account on the HPC, which is part of the Flemish Supercomputing Centre (VSC).
+See HPC policies for more information on who is entitled to an account.
+The VSC, abbreviation of Flemish Supercomputer Centre, is a virtual
+supercomputer centre. It is a partnership between the five Flemish
+associations: the Association KU Leuven, Ghent University Association,
+Brussels University Association, Antwerp University Association and the
+University Colleges-Limburg. The VSC is funded by the Flemish
+Government.
+There are two methods for connecting to HPC-UGent infrastructure:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
new file mode 100644
index 00000000000..371dd9db52b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
@@ -0,0 +1,19 @@
+Welcome e-mail
+Within one day, you should receive a Welcome e-mail with your VSC
+account details.
+Dear (Username), 
+Your VSC-account has been approved by an administrator.
+Your vsc-username is vsc40000
+Your account should be fully active within one hour.
+To check or update your account information please visit
+https://account.vscentrum.be/
+For further info please visit https://www.vscentrum.be/user-portal
+Kind regards,
+-- The VSC administrators
+
+Now, you can start using the HPC. You can always look up your VSC id later
+by visiting <https://account.vscentrum.be>.
+Adding multiple SSH public keys (optional)
+In case you are connecting from different computers to the login nodes,
+it is advised to use separate SSH public keys to do so. You should
+follow these steps.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json
new file mode 100644
index 00000000000..4b5b5202d1c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "",
+    "previous_title": "account_paragraph_9",
+    "next_title": "account_paragraph_11",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/account/#adding-multiple-ssh-public-keys-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt
new file mode 100644
index 00000000000..6ee6880838e
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt
@@ -0,0 +1,17 @@
+5.  Take into account that it will take some time before the new SSH
+    public key is active in your account on the system; waiting for
+    15-30 minutes should be sufficient.
+Computation Workflow on the HPC
+A typical Computation workflow will be:
+1.  Connect to the HPC
+2.  Transfer your files to the HPC
+3.  Compile your code and test it
+4.  Create a job script
+5.  Submit your job
+6.  Wait while
+    1.  your job gets into the queue
+    2.  your job gets executed
+    3.  your job finishes
+7.  Move your results
+We'll take you through the different tasks one by one in the following
+chapters.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json
new file mode 100644
index 00000000000..a5df035df49
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Computation-Workflow-on-the-HPC",
+    "title_depth": 2,
+    "directory": "account",
+    "parent_title": "",
+    "previous_title": "account_paragraph_11",
+    "next_title": null,
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/account/#computation-workflow-on-the-hpc"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
new file mode 100644
index 00000000000..726ce9f94fa
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "account",
+    "subtitle": "Getting-ready-to-request-an-account",
+    "title_depth": 2,
+    "directory": "account",
+    "links": {
+        "0": "../sites/hpc_policies"
+    },
+    "parent_title": "",
+    "previous_title": null,
+    "next_title": "account_paragraph_2",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt
new file mode 100644
index 00000000000..6ecd65e2184
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt
@@ -0,0 +1,6 @@
+- Using a terminal to connect via SSH.
+- Using the web portal
+The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required).
+If you would like use a terminal with SSH as this gives you more flexibility continue reading.
+However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account.
+Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
new file mode 100644
index 00000000000..257f886c6e0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
@@ -0,0 +1,16 @@
+{
+    "main_title": "account",
+    "subtitle": "Getting-ready-to-request-an-account",
+    "title_depth": 2,
+    "directory": "account",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/web_portal",
+        "1": "https://docs.hpc.ugent.be/account/#applying-for-the-account",
+        "2": "https://docs.hpc.ugent.be/web_portal"
+    },
+    "parent_title": "",
+    "previous_title": "account_paragraph_1",
+    "next_title": "account_paragraph_3",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
new file mode 100644
index 00000000000..9632ef1f5af
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
@@ -0,0 +1,11 @@
+The HPC-UGent infrastructure clusters use public/private key pairs for user authentication
+(rather than passwords). Technically, the private key is stored on your
+local computer and always stays there; the public key is stored on the HPC.
+Access to the HPC is granted to anyone who can prove to have access to the
+corresponding private key on his local computer.
+How do SSH keys work?
+-   an SSH public/private key pair can be seen as a lock and a key
+-   the SSH public key is equivalent with a lock: you give it to the
+    VSC and they put it on the door that gives access to your account.
+-   the SSH private key is like a physical key: you don't hand it out
+    to other people.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
new file mode 100644
index 00000000000..b94f233779b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "How-do-SSH-keys-work",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "",
+    "previous_title": "account_paragraph_2",
+    "next_title": "account_paragraph_4",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/account/#how-do-ssh-keys-work"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt
new file mode 100644
index 00000000000..125b566419a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt
@@ -0,0 +1,13 @@
+Applying for the account
+Visit <https://account.vscentrum.be/>
+You will be redirected to our WAYF (Where Are You From) service where
+you have to select your "Home Organisation".
+Select "UGent" in the dropdown box and optionally select "Save my preference"
+and "permanently".
+Click "Confirm"
+You will now be taken to the authentication page of your institute.
+After you log in using your UGent login and password, you will be asked to
+upload the file that contains your public key, i.e., the file
+"id_rsa.pub" which you have generated earlier. Make sure that your
+public key is actually accepted for upload, because if it is in a wrong
+format, wrong type or too short, then it will be refused.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json
new file mode 100644
index 00000000000..6d186b6ff46
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Applying-for-the-account",
+    "title_depth": 2,
+    "directory": "account",
+    "parent_title": "",
+    "previous_title": "account_paragraph_7",
+    "next_title": "account_paragraph_9",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/account/#applying-for-the-account"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt
new file mode 100644
index 00000000000..b144712c9df
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt
@@ -0,0 +1,13 @@
+Connecting to the HPC infrastructure
+Before you can really start using the HPC clusters, there are several things
+you need to do or know:
+1.  You need to log on to the cluster using an SSH client to one of
+    the login nodes or by using the HPC web portal.
+    This will give you command-line access.
+    A standard web browser like Firefox or Chrome for the web portal will suffice.
+2.  Before you can do some work, you'll have to transfer the files
+    that you need from your desktop computer to the cluster. At the end
+    of a job, you might want to transfer some files back.
+3.  Optionally, if you wish to use programs with a **graphical user
+    interface**, you will need an X-server on your client system and log
+    in to the login nodes with X-forwarding enabled.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt
new file mode 100644
index 00000000000..df00d4ed2a4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt
@@ -0,0 +1,7 @@
+Fast file transfer for large datasets
+See the section on rsync in chapter 5 of the Linux intro manual.
+Changing login nodes
+It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node.
+For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC:
+ssh gligar07.gastly.os
+This is also possible the other way around.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json
new file mode 100644
index 00000000000..0543efa4083
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Changing-login-nodes",
+    "title_depth": 2,
+    "directory": "connecting",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync"
+    },
+    "parent_title": "",
+    "previous_title": "connecting_paragraph_13",
+    "next_title": "connecting_paragraph_15",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt
new file mode 100644
index 00000000000..b2197618647
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt
@@ -0,0 +1,12 @@
+If you want to find out which login host you are connected to, you can use the hostname command.
+$ hostname
+gligar07.gastly.os
+$ ssh gligar08.gastly.os
+$ hostname
+gligar08.gastly.os
+
+Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux.
+These can make sessions that 'survives' across disconnects.
+You can find more information on how to use these tools here (or on other online sources):
+- screen
+- tmux
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
new file mode 100644
index 00000000000..d23146ed79f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Changing-login-nodes",
+    "title_depth": 2,
+    "directory": "connecting",
+    "links": {
+        "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/",
+        "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/"
+    },
+    "parent_title": "",
+    "previous_title": "connecting_paragraph_14",
+    "next_title": null,
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json
new file mode 100644
index 00000000000..ef0bc5473b0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Connecting-to-the-HPC-infrastructure",
+    "title_depth": 1,
+    "directory": "connecting",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/web_portal"
+    },
+    "parent_title": "",
+    "previous_title": null,
+    "next_title": "connecting_paragraph_2",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#connecting-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
new file mode 100644
index 00000000000..4c1d879b954
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
@@ -0,0 +1,14 @@
+4.  Often several versions of software packages and libraries are
+    installed, so you need to select the ones you need. To manage
+    different versions efficiently, the VSC clusters use so-called
+    modules, so you will need to select and load the modules that
+    you need.
+Connection restrictions
+Since March 20th 2020, restrictions are in place that limit from where
+you can connect to the VSC HPC infrastructure, in response to security
+incidents involving several European HPC centres.
+VSC login nodes are only directly accessible from within university
+networks, and from (most) Belgian commercial internet providers.
+All other IP domains are blocked by default. If you are connecting from
+an IP address that is not allowed direct access, you have the following
+options to get access to VSC login nodes:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json
new file mode 100644
index 00000000000..39ee53fcf0b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Connection-restrictions",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "",
+    "previous_title": "connecting_paragraph_1",
+    "next_title": "connecting_paragraph_3",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
new file mode 100644
index 00000000000..668a1e6df57
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
@@ -0,0 +1,12 @@
+-   Use an VPN connection to connect to UGent the network (recommended).
+-   Whitelist your IP address automatically by accessing
+    <https://firewall.vscentrum.be> and log in with your UGent account.
+    -   While this web connection is active new SSH sessions can be
+        started.
+    -   Active SSH sessions will remain active even when this web page
+        is closed.
+-   Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your
+    IP range (e.g., for industry access, automated processes).
+Trying to establish an SSH connection from an IP address that does not
+adhere to these restrictions will result in an immediate failure to
+connect, with an error message like:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
new file mode 100644
index 00000000000..4dc75d7dcf3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Connection-restrictions",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "",
+    "previous_title": "connecting_paragraph_2",
+    "next_title": "connecting_paragraph_4",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt
new file mode 100644
index 00000000000..472991adada
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt
@@ -0,0 +1,16 @@
+Congratulations, you're on the HPC infrastructure now!
+To find out where you have landed you can print the current working directory:
+$ pwd
+/user/home/gent/vsc400/vsc40000
+
+Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own
+subdirectory structure, copy and prepare your applications, compile and
+test them and submit your jobs on the HPC.
+$ cd /apps/gent/tutorials
+$ ls
+Intro-HPC/
+
+This directory currently contains all training material for the Introduction to the HPC. More
+relevant training material to work with the HPC can always be added later in
+this directory.
+You can now explore the content of this directory with the "ls --l" (lists long) and the "cd" (change directory) commands:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json
new file mode 100644
index 00000000000..1c7ae8ed267
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "",
+    "previous_title": "connecting_paragraph_5",
+    "next_title": "connecting_paragraph_7",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt
new file mode 100644
index 00000000000..35996afe4da
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt
@@ -0,0 +1,22 @@
+As we are interested in the use of the HPC, move further to Intro-HPC and explore the
+contents up to 2 levels deep:
+$ cd Intro-HPC
+$ tree -L 2
+.
+'-- examples
+    |-- Compiling-and-testing-your-software-on-the-HPC
+    |-- Fine-tuning-Job-Specifications
+    |-- Multi-core-jobs-Parallel-Computing
+    |-- Multi-job-submission
+    |-- Program-examples
+    |-- Running-batch-jobs
+    |-- Running-jobs-with-input
+    |-- Running-jobs-with-input-output-data
+    |-- example.pbs
+    '-- example.sh
+9 directories, 5 files
+
+This directory contains:
+1.  This HPC Tutorial (in either a Mac, Linux or      Windows version).
+2.  An examples subdirectory, containing all the examples that you need in this
+    Tutorial, as well as examples that might be useful for your specific applications.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json
new file mode 100644
index 00000000000..709753e4dc4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "",
+    "previous_title": "connecting_paragraph_6",
+    "next_title": "connecting_paragraph_8",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt
new file mode 100644
index 00000000000..096c74c1372
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt
@@ -0,0 +1,13 @@
+$ cd examples
+
+ tip
+    Typing cd ex followed by tab (the Tab-key) will generate the cd examples
+    command. Command-line completion (also tab completion) is a common feature of the bash command
+    line interpreter, in which the program automatically fills in partially
+    typed commands.
+ tip
+    For more exhaustive tutorials about Linux usage, see Appendix Useful Linux Commands
+The first action is to copy the contents of the HPC examples directory to
+your home directory, so that you have your own personal copy and that
+you can start using the examples. The "-r" option of the copy command
+will also copy the contents of the sub-directories "recursively".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
new file mode 100644
index 00000000000..0241e0bd6b9
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "connecting",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "title_depth": 2,
+    "directory": "connecting",
+    "links": {
+        "0": "../useful_linux_commands"
+    },
+    "parent_title": "",
+    "previous_title": "connecting_paragraph_7",
+    "next_title": "connecting_paragraph_9",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt
new file mode 100644
index 00000000000..5a634e6bddc
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt
@@ -0,0 +1,27 @@
+$ cp -r /apps/gent/tutorials/Intro-HPC/examples ~/
+
+You can exit the connection at anytime by entering:
+$ exit
+logout
+Connection to login.hpc.ugent.be closed.
+
+ tip "tip: Setting your Language right"
+    You may encounter a warning message similar to the following one during connecting:
+    perl: warning: Setting locale failed.
+    perl: warning: Please check that your locale settings:
+    LANGUAGE = (unset),
+    LC_ALL = (unset),
+    LC_CTYPE = "UTF-8",
+    LANG = (unset)
+        are supported and installed on your system.
+    perl: warning: Falling back to the standard locale ("C").
+    or any other error message complaining about the locale.
+    This means that the correct "locale" has not yet been properly specified on your local machine. Try:
+    LANG=
+    LC_COLLATE="C"
+    LC_CTYPE="UTF-8"
+    LC_MESSAGES="C"
+    LC_MONETARY="C"
+    LC_NUMERIC="C"
+    LC_TIME="C"
+    LC_ALL=
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json
new file mode 100644
index 00000000000..40b04f24e9f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "",
+    "previous_title": "connecting_paragraph_8",
+    "next_title": "connecting_paragraph_10",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
new file mode 100644
index 00000000000..3a46897bdee
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
@@ -0,0 +1,11 @@
+Adding multiple SSH public keys (optional)
+1.  Create a new public/private SSH key pair from the new computer.
+    Repeat the process described in
+    section Generate a public/private key pair with OpenSSH.
+2.  Go to <https://account.vscentrum.be/django/account/edit>
+3.  Upload the new SSH public key using the Add public key section. Make sure that your
+    public key is actually saved, because a public key will be refused
+    if it is too short, wrong type, or in a wrong format.
+4.  (optional) If you lost your key, you can delete the old key on the
+    same page. You should keep at least one valid public SSH key in your
+    account.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json
new file mode 100644
index 00000000000..72b9f92061c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "account",
+    "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Applying-for-the-account",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh"
+    },
+    "previous_title": "account_paragraph_10",
+    "next_title": "account_paragraph_12",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#adding-multiple-ssh-public-keys-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt
new file mode 100644
index 00000000000..1395e2ee7bd
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt
@@ -0,0 +1,10 @@
+How do SSH keys work
+-   anyone who has the key (and the optional password) can unlock the
+    door and log in to the account.
+-   the door to your VSC account is special: it can have multiple
+    locks (SSH public keys) attached to it, and you only need to open
+    one lock with the corresponding key (SSH private key) to open
+    the door (log in to the account).
+Since all VSC clusters use Linux as their main operating system, you
+will need to get acquainted with using the command-line interface and
+using the terminal (see tutorial).
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
new file mode 100644
index 00000000000..52e1569a8a7
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "account",
+    "subtitle": "How-do-SSH-keys-work",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "links": {
+        "0": "../../linux-tutorial"
+    },
+    "previous_title": "account_paragraph_3",
+    "next_title": "account_paragraph_5",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#how-do-ssh-keys-work"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt
new file mode 100644
index 00000000000..caaaea5ee91
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt
@@ -0,0 +1,14 @@
+How do SSH keys work
+Launch a terminal from your desktop's application menu and you will see
+the bash shell. There are other shells, but most Linux distributions use
+bash by default.
+Test OpenSSH
+Secure Shell (ssh) is a cryptographic network protocol for secure data
+communication, remote command-line login, remote command execution, and
+other secure network services between two networked computers. In short,
+ssh provides a secure connection between 2 computers via insecure
+channels (Network, Internet, telephone lines, ...).
+"Secure" means that:
+1.  the User is authenticated to the System; and
+2.  the System is authenticated to the User; and
+3.  all data is encrypted during transfer.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json
new file mode 100644
index 00000000000..4636f13a4b4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Test-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_paragraph_4",
+    "next_title": "account_linux_paragraph_5.2",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt
new file mode 100644
index 00000000000..a166dd14503
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt
@@ -0,0 +1,13 @@
+OpenSSH is a FREE implementation of the SSH connectivity protocol.  comes
+with its own implementation of OpenSSH, so you don't need to install any
+third-party software to use it. Just open a terminal window and jump in!
+On all popular Linux distributions, the OpenSSH software is readily
+available, and most often installed by default. You can check whether
+the OpenSSH software is installed by opening a terminal and typing:
+$ ssh -V
+OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017
+To access the clusters and transfer your files, you will use the
+following commands:
+1.  ssh-keygen: to generate the SSH key pair (public + private key);
+2.  ssh: to open a shell on a remote machine;
+3.  sftp: a secure equivalent of ftp;
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json
new file mode 100644
index 00000000000..ca9c4c7dc1d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Test-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_linux_paragraph_5.1",
+    "next_title": "account_linux_paragraph_5.3",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
new file mode 100644
index 00000000000..2e8fe9e3a24
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
@@ -0,0 +1,17 @@
+4.  scp: a secure equivalent of the remote copy command rcp.
+Generate a public/private key pair with OpenSSH
+A key pair might already be present in the default location inside your
+home directory. Therefore, we first check if a key is available with the
+"list short" ("ls") command:
+$ ls ~/.ssh
+If a key-pair is already available, you would normally get:
+authorized_keys     id_rsa      id_rsa.pub      known_hosts
+Otherwise, the command will show:
+ls: .ssh: No such file or directory
+You can recognise a public/private key pair when a pair of files has the
+same name except for the extension ".pub" added to one of them. In this
+particular case, the private key is "id_rsa" and public key is
+"id_rsa.pub". You may have multiple keys (not necessarily in the
+directory "~/.ssh") if you or your operating system requires this. Be
+aware that your existing key pair might be too short, or not the right
+type.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json
new file mode 100644
index 00000000000..d902f6a0838
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_linux_paragraph_5.2",
+    "next_title": "account_linux_paragraph_5.4",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt
new file mode 100644
index 00000000000..3cde4395d81
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt
@@ -0,0 +1,18 @@
+You will need to generate a new key pair, when:
+1.  you don't have a key pair yet
+2.  you forgot the passphrase protecting your private key
+3.  your private key was compromised
+4.  your key pair is too short or not the right type
+For extra security, the private key itself can be encrypted using a
+"passphrase", to prevent anyone from using your private key even when
+they manage to copy it. You have to "unlock" the private key by typing
+the passphrase. Be sure to never give away your private key, it is
+private and should stay private. You should not even copy it to one of
+your other machines, instead, you should create a new public/private key
+pair for each machine.
+$ ssh-keygen -t rsa -b 4096
+Generating public/private rsa key pair. Enter file in which to save the
+key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no
+passphrase): Enter same passphrase again: Your identification has been
+saved in /home/user/.ssh/id_rsa. Your public key has been saved in
+/home/user/.ssh/id_rsa.pub.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json
new file mode 100644
index 00000000000..1edae26d97b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_linux_paragraph_5.3",
+    "next_title": "account_linux_paragraph_5.5",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt
new file mode 100644
index 00000000000..78c142e82e0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt
@@ -0,0 +1,6 @@
+This will ask you for a file name to store the private and public key,
+and a passphrase to protect your private key. It needs to be emphasised
+that you really should choose the passphrase wisely! The system will ask
+you for it every time you want to use the private key that is every time
+you want to access the cluster or transfer your files.
+Without your key pair, you won't be able to apply for a personal VSC account.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json
new file mode 100644
index 00000000000..29affc0335e
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_linux_paragraph_5.4",
+    "next_title": "account_paragraph_6",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt
new file mode 100644
index 00000000000..c3b395b5296
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt
@@ -0,0 +1 @@
+Using an SSH agent (optional)
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json
new file mode 100644
index 00000000000..acf12bc0a7d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Using-an-SSH-agent-(optional",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_paragraph_5",
+    "next_title": "account_paragraph_7",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt
new file mode 100644
index 00000000000..e3ef2176f09
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt
@@ -0,0 +1,14 @@
+Using an SSH agent (optional)
+Most recent Unix derivatives include by default an SSH agent ("gnome-keyring-daemon" in most cases)  
+to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into
+the SSH manager keyring to be able to connect to the HPC cluster. If
+not, SSH client will display an error message (see Connecting) similar to this:
+Agent admitted failure to sign using the key. 
+Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
+This could be fixed using the ssh-add command. You can include the new
+private keys' identities in your keyring with:
+$ ssh-add
+ tip
+    Without extra options ssh-add adds any key located at $HOME/.ssh
+    directory, but you can specify the private key location path as
+    argument, as example: ssh-add /path/to/my/id_rsa.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
new file mode 100644
index 00000000000..b6b1e052345
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "account",
+    "subtitle": "Using-an-SSH-agent-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "links": {
+        "0": "../connecting"
+    },
+    "previous_title": "account_paragraph_6",
+    "next_title": "account_linux_paragraph_7.2",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt
new file mode 100644
index 00000000000..93019fa1a6a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt
@@ -0,0 +1,8 @@
+Check that your key is available from the keyring with:
+$ ssh-add -l
+After these changes the key agent will keep your SSH key to connect to
+the clusters as usual.
+ tip
+    You should execute ssh-add command again if you generate a new SSH
+    key.
+Visit <https://wiki.gnome.org/Projects/GnomeKeyring/Ssh> for more information.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json
new file mode 100644
index 00000000000..35466be5b56
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Using-an-SSH-agent-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_linux_paragraph_7.1",
+    "next_title": "account_paragraph_8",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt
new file mode 100644
index 00000000000..a9059b224bf
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt
@@ -0,0 +1,6 @@
+Applying for the account
+This file has been stored in the directory "~/.ssh/".
+After you have uploaded your public key you will receive an e-mail with
+a link to confirm your e-mail address. After confirming your e-mail
+address the VSC staff will review and if applicable approve your
+account.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json
new file mode 100644
index 00000000000..21988388723
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Applying-for-the-account",
+    "title_depth": 2,
+    "directory": "account",
+    "parent_title": "account",
+    "previous_title": "account_paragraph_8",
+    "next_title": "account_paragraph_10",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#applying-for-the-account"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt
new file mode 100644
index 00000000000..3e588c709d4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt
@@ -0,0 +1,35 @@
+First Time connection to the HPC infrastructure
+    A locale is a set of parameters that defines the user's language, country and
+    any special variant preferences that the user wants to see in their user
+    interface. Usually a locale identifier consists of at least a language
+    identifier and a region identifier.
+    Open the .bashrc on your local machine with your favourite editor and
+    add the following lines:
+    
+    $ nano ~/.bashrc
+    ...
+    export LANGUAGE="en_US.UTF-8"
+    export LC_ALL="en_US.UTF-8"
+    export LC_CTYPE="en_US.UTF-8"
+    export LANG="en_US.UTF-8"
+    ...
+    
+    
+     tip "tip: vi"
+        To start entering text in vi: move to the place you want to start
+        entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
+        To exit vi without saving your changes, enter ""ESC":q!"
+    
+    
+    or alternatively (if you are not comfortable with the Linux editors),
+    again on your local machine:
+    
+    $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
+    $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
+    $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
+    $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
+    
+    
+    You can now log out, open a new terminal/shell on your local machine and
+    reconnect to the login node, and you should not get these warnings anymore.
+    
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json
new file mode 100644
index 00000000000..364c81834cf
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
+    "previous_title": "connecting_paragraph_9",
+    "next_title": "connecting_paragraph_11",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt
new file mode 100644
index 00000000000..d872c89a0f8
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt
@@ -0,0 +1,6 @@
+Transfer Files to/from the HPC
+Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
+The preferred way to transfer files is by using an scp or sftp via the
+secure OpenSSH protocol.  ships with an implementation of OpenSSH, so you
+don't need to install any third-party software to use it. Just open a
+terminal window and jump in!
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json
new file mode 100644
index 00000000000..420f73742f5
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Transfer-Files-tofrom-the-HPC",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
+    "previous_title": "connecting_paragraph_10",
+    "next_title": "connecting_paragraph_12",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt
new file mode 100644
index 00000000000..8d0031fcca9
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt
@@ -0,0 +1,12 @@
+Transfer Files tofrom the HPC
+Using scp
+Secure copy or SCP is a tool (command) for securely transferring files between a local
+host (= your computer) and a remote host (the HPC). It is based on the
+Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e.,
+copy) command, but can copy files to or from remote machines.
+It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if
+you have symlinks to them in your home directory. See 
+the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux
+ for how to do this.
+Open an additional terminal window and check that you're working on your
+local machine.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json
new file mode 100644
index 00000000000..19eba778d90
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-scp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
+    },
+    "previous_title": "connecting_paragraph_11",
+    "next_title": "connecting_linux_paragraph_12.2",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt
new file mode 100644
index 00000000000..f1da0677a67
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt
@@ -0,0 +1,17 @@
+$ hostname
+<local-machine-name>
+If you're still using the terminal that is connected to the HPC, close the
+connection by typing "exit" in the terminal window.
+For example, we will copy the (local) file "localfile.txt" to your
+home directory on the HPC cluster. We first generate a small dummy
+"localfile.txt", which contains the word "Hello". Use your own VSC
+account, which is something like "vsc40000". Don't forget the colon (:) at the
+end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your
+local filesystem. You can even specify where to save the file on the
+remote filesystem by putting a path after the colon.
+$ echo "Hello" > localfile.txt
+$ ls -l 
+...
+-rw-r--r-- 1 user  staff   6 Sep 18 09:37 localfile.txt
+$ scp localfile.txt vsc40000@login.hpc.ugent.be:
+localfile.txt     100%   6     0.0KB/s     00:00
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json
new file mode 100644
index 00000000000..0b3a3418c55
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-scp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_linux_paragraph_12.1",
+    "next_title": "connecting_linux_paragraph_12.3",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt
new file mode 100644
index 00000000000..9585900e356
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt
@@ -0,0 +1,22 @@
+Connect to the HPC via another terminal, print the working directory (to
+make sure you're in the home directory) and check whether the file has
+arrived:
+$ pwd
+/user/home/gent/vsc400/vsc40000
+$ ls -l 
+total 1536
+drwxrwxr-x 2
+drwxrwxr-x 2
+drwxrwxr-x 10
+-rw-r--r-- 1
+$ cat localfile.txt
+Hello
+The scp command can also be used to copy files from the cluster to your
+local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs"
+subdirectory on the cluster to your local computer.
+First, we will confirm that the file is indeed in the "docs"
+subdirectory. In the terminal on the login node, enter:
+$ cd ~/docs
+$ ls -l
+total 1536
+-rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json
new file mode 100644
index 00000000000..5624749ede8
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-scp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_linux_paragraph_12.2",
+    "next_title": "connecting_linux_paragraph_12.4",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt
new file mode 100644
index 00000000000..2664953ed0c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt
@@ -0,0 +1,14 @@
+Now we will copy the file to the local machine. On the terminal on your
+own local computer, enter:
+$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf .
+intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01
+$ ls -l
+total 899
+-rw-r--r-- 1 user staff 741995 Sep 18 09:53
+-rw-r--r-- 1 user staff      6 Sep 18 09:37 localfile.txt
+The file has been copied from the HPC to your local computer.
+It's also possible to copy entire directories (and their contents) with
+the -r flag. For example, if we want to copy the local directory
+dataset to $VSC_SCRATCH, we can use the following command (assuming
+you've created the scratch symlink):
+$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json
new file mode 100644
index 00000000000..5a401911cab
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-scp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_linux_paragraph_12.3",
+    "next_title": "connecting_linux_paragraph_12.5",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt
new file mode 100644
index 00000000000..51d39b548c3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt
@@ -0,0 +1,14 @@
+If you don't use the -r option to copy a directory, you will run into
+the following error:
+$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch
+dataset: not a regular file
+Using sftp
+The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file
+transfer and file management functionalities over any reliable data
+stream. It was designed as an extension of the Secure Shell protocol
+(SSH) version 2.0. This protocol assumes that it is run over a secure
+channel, such as SSH, that the server has already authenticated the
+client, and that the identity of the client user is available to the
+protocol.
+The sftp is an equivalent of the ftp command, with the difference that
+it uses the secure ssh protocol to connect to the clusters.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json
new file mode 100644
index 00000000000..a479f66e7e0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-sftp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_linux_paragraph_12.4",
+    "next_title": "connecting_linux_paragraph_12.6",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt
new file mode 100644
index 00000000000..4ae257101f1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt
@@ -0,0 +1,18 @@
+One easy way of starting a sftp session is
+$ sftp vsc40000@login.hpc.ugent.be
+Typical and popular commands inside an sftp session are:
+|                       |                                                                                      |
+|:--------------------------|:-------------------------------------------------------------------------------------|
+| cd ~/exmples/fibo     | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine)   |
+| ls                    | Get a list of the files in the current directory on the HPC.                   |
+| get fibo.py           | Copy the file "fibo.py" from the HPC                                           |
+| get tutorial/HPC.pdf  | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. |
+| lcd test              | Move to the "test" subdirectory on your local machine.                               |
+| lcd ..                | Move up one level in the local directory.                                            |
+| lls                   | Get local directory listing.                                                         |
+| put test.py           | Copy the local file test.py to the HPC.                                        |
+| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py.                       |
+| bye                   | Quit the sftp session                                                                |
+| **mget *.cc**             | Copy all the remote files with extension ".cc" to the local directory.               |
+| **mput *.h**              | Copy all the local files with extension ".h" to the HPC.                       |
+|                       |                                                                                      |
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json
new file mode 100644
index 00000000000..9c744fd5133
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-sftp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "links": {
+        "0": "",
+        "1": ""
+    },
+    "previous_title": "connecting_linux_paragraph_12.5",
+    "next_title": "connecting_linux_paragraph_12.7",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt
new file mode 100644
index 00000000000..a0496edfb14
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt
@@ -0,0 +1,10 @@
+Transfer Files tofrom the HPC
+Using a GUI
+If you prefer a GUI to transfer files back and forth to the HPC, you can
+use your file browser. Open your file browser and press
+++"Ctrl"+"l"++
+This should open up a address bar where you can enter a URL.
+Alternatively, look for the "connect to server" option in your file
+browsers menu.
+Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter.
+You should now be able to browse files on the HPC in your file browser.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
new file mode 100644
index 00000000000..d634a356654
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-a-GUI",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_paragraph_12",
+    "next_title": "connecting_paragraph_14",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt
new file mode 100644
index 00000000000..773d03f0689
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt
@@ -0,0 +1,7 @@
+First Time connection to the HPC infrastructure
+ssh_exchange_identification: read: Connection reset by peer
+First Time connection to the HPC infrastructure
+The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH.
+If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal.
+If you have any issues connecting to the HPC after you've followed these
+steps, see Issues connecting to login node to troubleshoot.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json
new file mode 100644
index 00000000000..f6745fc31dc
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "connecting",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/web_portal",
+        "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node"
+    },
+    "previous_title": "connecting_paragraph_3",
+    "next_title": "connecting_paragraph_5",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt
new file mode 100644
index 00000000000..94d5d9500a3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt
@@ -0,0 +1,12 @@
+First Time connection to the HPC infrastructure
+Connect
+Open up a terminal and enter the following command to connect to the HPC.
+$ ssh vsc40000@login.hpc.ugent.be
+Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login
+node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command.
+The first time you make a connection to the login node, you will be
+asked to verify the authenticity of the login node. Please check
+Warning message when first connecting to new host on how to do this.
+A possible error message you can get if you previously saved your
+private key somewhere else than the default location
+($HOME/.ssh/id_rsa):
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
new file mode 100644
index 00000000000..05996eb5df2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Connect",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host"
+    },
+    "previous_title": "connecting_paragraph_4",
+    "next_title": "connecting_linux_paragraph_5.2",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt
new file mode 100644
index 00000000000..312fe885cb0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt
@@ -0,0 +1,4 @@
+Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
+In this case, use the -i option for the ssh command to specify the
+location of your private key. For example:
+$ ssh -i /home/example/my_keys
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json
new file mode 100644
index 00000000000..85a826e41a3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Connect",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
+    "previous_title": "connecting_linux_paragraph_5.1",
+    "next_title": "connecting_paragraph_6",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
new file mode 100644
index 00000000000..3a46897bdee
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
@@ -0,0 +1,11 @@
+Adding multiple SSH public keys (optional)
+1.  Create a new public/private SSH key pair from the new computer.
+    Repeat the process described in
+    section Generate a public/private key pair with OpenSSH.
+2.  Go to <https://account.vscentrum.be/django/account/edit>
+3.  Upload the new SSH public key using the Add public key section. Make sure that your
+    public key is actually saved, because a public key will be refused
+    if it is too short, wrong type, or in a wrong format.
+4.  (optional) If you lost your key, you can delete the old key on the
+    same page. You should keep at least one valid public SSH key in your
+    account.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json
new file mode 100644
index 00000000000..dd8b3400419
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "account",
+    "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Applying-for-the-account",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh"
+    },
+    "previous_title": "account_paragraph_10",
+    "next_title": "account_paragraph_12",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#adding-multiple-ssh-public-keys-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt
new file mode 100644
index 00000000000..1395e2ee7bd
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt
@@ -0,0 +1,10 @@
+How do SSH keys work
+-   anyone who has the key (and the optional password) can unlock the
+    door and log in to the account.
+-   the door to your VSC account is special: it can have multiple
+    locks (SSH public keys) attached to it, and you only need to open
+    one lock with the corresponding key (SSH private key) to open
+    the door (log in to the account).
+Since all VSC clusters use Linux as their main operating system, you
+will need to get acquainted with using the command-line interface and
+using the terminal (see tutorial).
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
new file mode 100644
index 00000000000..33d083958b9
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "account",
+    "subtitle": "How-do-SSH-keys-work",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "links": {
+        "0": "../../linux-tutorial"
+    },
+    "previous_title": "account_paragraph_3",
+    "next_title": "account_paragraph_5",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#how-do-ssh-keys-work"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt
new file mode 100644
index 00000000000..f3483fcaef1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt
@@ -0,0 +1,12 @@
+How do SSH keys work
+To open a Terminal window in macOS, open the Finder and choose
+*\>\> Applications \> Utilities \> Terminal*
+Before requesting an account, you need to generate a pair of ssh keys.
+One popular way to do this on  is using the OpenSSH client included with , which you can then also use to log on to the clusters.
+Test OpenSSH
+Secure Shell (ssh) is a cryptographic network protocol for secure data
+communication, remote command-line login, remote command execution, and
+other secure network services between two networked computers. In short,
+ssh provides a secure connection between 2 computers via insecure
+channels (Network, Internet, telephone lines, ...).
+"Secure" means that:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json
new file mode 100644
index 00000000000..c75d6aede58
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Test-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_paragraph_4",
+    "next_title": "account_macos_paragraph_5.2",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt
new file mode 100644
index 00000000000..5189a953002
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt
@@ -0,0 +1,13 @@
+1.  the User is authenticated to the System; and
+2.  the System is authenticated to the User; and
+3.  all data is encrypted during transfer.
+OpenSSH is a FREE implementation of the SSH connectivity protocol.  comes
+with its own implementation of OpenSSH, so you don't need to install any
+third-party software to use it. Just open a terminal window and jump in!
+On all popular Linux distributions, the OpenSSH software is readily
+available, and most often installed by default. You can check whether
+the OpenSSH software is installed by opening a terminal and typing:
+$ ssh -V
+OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017
+To access the clusters and transfer your files, you will use the
+following commands:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json
new file mode 100644
index 00000000000..7f6c80a32f6
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Test-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_macos_paragraph_5.1",
+    "next_title": "account_macos_paragraph_5.3",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
new file mode 100644
index 00000000000..2c97d597425
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
@@ -0,0 +1,20 @@
+1.  ssh-keygen: to generate the SSH key pair (public + private key);
+2.  ssh: to open a shell on a remote machine;
+3.  sftp: a secure equivalent of ftp;
+4.  scp: a secure equivalent of the remote copy command rcp.
+Generate a public/private key pair with OpenSSH
+A key pair might already be present in the default location inside your
+home directory. Therefore, we first check if a key is available with the
+"list short" ("ls") command:
+$ ls ~/.ssh
+If a key-pair is already available, you would normally get:
+authorized_keys     id_rsa      id_rsa.pub      known_hosts
+Otherwise, the command will show:
+ls: .ssh: No such file or directory
+You can recognise a public/private key pair when a pair of files has the
+same name except for the extension ".pub" added to one of them. In this
+particular case, the private key is "id_rsa" and public key is
+"id_rsa.pub". You may have multiple keys (not necessarily in the
+directory "~/.ssh") if you or your operating system requires this. Be
+aware that your existing key pair might be too short, or not the right
+type.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json
new file mode 100644
index 00000000000..7c0f0d2a04d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_macos_paragraph_5.2",
+    "next_title": "account_macos_paragraph_5.4",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt
new file mode 100644
index 00000000000..3cde4395d81
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt
@@ -0,0 +1,18 @@
+You will need to generate a new key pair, when:
+1.  you don't have a key pair yet
+2.  you forgot the passphrase protecting your private key
+3.  your private key was compromised
+4.  your key pair is too short or not the right type
+For extra security, the private key itself can be encrypted using a
+"passphrase", to prevent anyone from using your private key even when
+they manage to copy it. You have to "unlock" the private key by typing
+the passphrase. Be sure to never give away your private key, it is
+private and should stay private. You should not even copy it to one of
+your other machines, instead, you should create a new public/private key
+pair for each machine.
+$ ssh-keygen -t rsa -b 4096
+Generating public/private rsa key pair. Enter file in which to save the
+key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no
+passphrase): Enter same passphrase again: Your identification has been
+saved in /home/user/.ssh/id_rsa. Your public key has been saved in
+/home/user/.ssh/id_rsa.pub.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json
new file mode 100644
index 00000000000..346108200ac
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_macos_paragraph_5.3",
+    "next_title": "account_macos_paragraph_5.5",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt
new file mode 100644
index 00000000000..78c142e82e0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt
@@ -0,0 +1,6 @@
+This will ask you for a file name to store the private and public key,
+and a passphrase to protect your private key. It needs to be emphasised
+that you really should choose the passphrase wisely! The system will ask
+you for it every time you want to use the private key that is every time
+you want to access the cluster or transfer your files.
+Without your key pair, you won't be able to apply for a personal VSC account.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json
new file mode 100644
index 00000000000..25baa1e073f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_macos_paragraph_5.4",
+    "next_title": "account_paragraph_6",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt
new file mode 100644
index 00000000000..c3b395b5296
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt
@@ -0,0 +1 @@
+Using an SSH agent (optional)
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json
new file mode 100644
index 00000000000..b8931a423d3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Using-an-SSH-agent-(optional",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_paragraph_5",
+    "next_title": "account_paragraph_7",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt
new file mode 100644
index 00000000000..d204f4e4392
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt
@@ -0,0 +1,14 @@
+Using an SSH agent (optional)
+Most recent Unix derivatives include by default an SSH agent 
+to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into
+the SSH manager keyring to be able to connect to the HPC cluster. If
+not, SSH client will display an error message (see Connecting) similar to this:
+Agent admitted failure to sign using the key. 
+Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
+This could be fixed using the ssh-add command. You can include the new
+private keys' identities in your keyring with:
+$ ssh-add
+ tip
+    Without extra options ssh-add adds any key located at $HOME/.ssh
+    directory, but you can specify the private key location path as
+    argument, as example: ssh-add /path/to/my/id_rsa.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
new file mode 100644
index 00000000000..c43391b146e
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "account",
+    "subtitle": "Using-an-SSH-agent-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "links": {
+        "0": "../connecting"
+    },
+    "previous_title": "account_paragraph_6",
+    "next_title": "account_macos_paragraph_7.2",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt
new file mode 100644
index 00000000000..8fd93f6b4f6
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt
@@ -0,0 +1,7 @@
+Check that your key is available from the keyring with:
+$ ssh-add -l
+After these changes the key agent will keep your SSH key to connect to
+the clusters as usual.
+ tip
+    You should execute ssh-add command again if you generate a new SSH
+    key.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json
new file mode 100644
index 00000000000..519b58bb151
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Using-an-SSH-agent-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_macos_paragraph_7.1",
+    "next_title": "account_paragraph_8",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt
new file mode 100644
index 00000000000..d11380c2519
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt
@@ -0,0 +1,11 @@
+Applying for the account
+This file has been stored in the directory "~/.ssh/".
+ tip
+    As ".ssh" is an invisible directory, the Finder will not show it by
+    default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++),
+    which will allow you to enter the name of a directory, which you would
+    like to open in Finder. Here, type "~/.ssh" and press enter.
+After you have uploaded your public key you will receive an e-mail with
+a link to confirm your e-mail address. After confirming your e-mail
+address the VSC staff will review and if applicable approve your
+account.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json
new file mode 100644
index 00000000000..6b6e8c72703
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Applying-for-the-account",
+    "title_depth": 2,
+    "directory": "account",
+    "parent_title": "account",
+    "previous_title": "account_paragraph_8",
+    "next_title": "account_paragraph_10",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#applying-for-the-account"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt
new file mode 100644
index 00000000000..3e588c709d4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt
@@ -0,0 +1,35 @@
+First Time connection to the HPC infrastructure
+    A locale is a set of parameters that defines the user's language, country and
+    any special variant preferences that the user wants to see in their user
+    interface. Usually a locale identifier consists of at least a language
+    identifier and a region identifier.
+    Open the .bashrc on your local machine with your favourite editor and
+    add the following lines:
+    
+    $ nano ~/.bashrc
+    ...
+    export LANGUAGE="en_US.UTF-8"
+    export LC_ALL="en_US.UTF-8"
+    export LC_CTYPE="en_US.UTF-8"
+    export LANG="en_US.UTF-8"
+    ...
+    
+    
+     tip "tip: vi"
+        To start entering text in vi: move to the place you want to start
+        entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
+        To exit vi without saving your changes, enter ""ESC":q!"
+    
+    
+    or alternatively (if you are not comfortable with the Linux editors),
+    again on your local machine:
+    
+    $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
+    $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
+    $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
+    $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
+    
+    
+    You can now log out, open a new terminal/shell on your local machine and
+    reconnect to the login node, and you should not get these warnings anymore.
+    
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json
new file mode 100644
index 00000000000..4c6e5477119
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
+    "previous_title": "connecting_paragraph_9",
+    "next_title": "connecting_paragraph_11",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt
new file mode 100644
index 00000000000..d872c89a0f8
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt
@@ -0,0 +1,6 @@
+Transfer Files to/from the HPC
+Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
+The preferred way to transfer files is by using an scp or sftp via the
+secure OpenSSH protocol.  ships with an implementation of OpenSSH, so you
+don't need to install any third-party software to use it. Just open a
+terminal window and jump in!
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json
new file mode 100644
index 00000000000..1425455ade8
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Transfer-Files-tofrom-the-HPC",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
+    "previous_title": "connecting_paragraph_10",
+    "next_title": "connecting_paragraph_12",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt
new file mode 100644
index 00000000000..8d0031fcca9
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt
@@ -0,0 +1,12 @@
+Transfer Files tofrom the HPC
+Using scp
+Secure copy or SCP is a tool (command) for securely transferring files between a local
+host (= your computer) and a remote host (the HPC). It is based on the
+Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e.,
+copy) command, but can copy files to or from remote machines.
+It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if
+you have symlinks to them in your home directory. See 
+the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux
+ for how to do this.
+Open an additional terminal window and check that you're working on your
+local machine.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json
new file mode 100644
index 00000000000..332e6ed2996
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-scp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
+    },
+    "previous_title": "connecting_paragraph_11",
+    "next_title": "connecting_macos_paragraph_12.2",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt
new file mode 100644
index 00000000000..f1da0677a67
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt
@@ -0,0 +1,17 @@
+$ hostname
+<local-machine-name>
+If you're still using the terminal that is connected to the HPC, close the
+connection by typing "exit" in the terminal window.
+For example, we will copy the (local) file "localfile.txt" to your
+home directory on the HPC cluster. We first generate a small dummy
+"localfile.txt", which contains the word "Hello". Use your own VSC
+account, which is something like "vsc40000". Don't forget the colon (:) at the
+end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your
+local filesystem. You can even specify where to save the file on the
+remote filesystem by putting a path after the colon.
+$ echo "Hello" > localfile.txt
+$ ls -l 
+...
+-rw-r--r-- 1 user  staff   6 Sep 18 09:37 localfile.txt
+$ scp localfile.txt vsc40000@login.hpc.ugent.be:
+localfile.txt     100%   6     0.0KB/s     00:00
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json
new file mode 100644
index 00000000000..d86cdd989ac
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-scp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_macos_paragraph_12.1",
+    "next_title": "connecting_macos_paragraph_12.3",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt
new file mode 100644
index 00000000000..9585900e356
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt
@@ -0,0 +1,22 @@
+Connect to the HPC via another terminal, print the working directory (to
+make sure you're in the home directory) and check whether the file has
+arrived:
+$ pwd
+/user/home/gent/vsc400/vsc40000
+$ ls -l 
+total 1536
+drwxrwxr-x 2
+drwxrwxr-x 2
+drwxrwxr-x 10
+-rw-r--r-- 1
+$ cat localfile.txt
+Hello
+The scp command can also be used to copy files from the cluster to your
+local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs"
+subdirectory on the cluster to your local computer.
+First, we will confirm that the file is indeed in the "docs"
+subdirectory. In the terminal on the login node, enter:
+$ cd ~/docs
+$ ls -l
+total 1536
+-rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json
new file mode 100644
index 00000000000..4fcc42d2337
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-scp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_macos_paragraph_12.2",
+    "next_title": "connecting_macos_paragraph_12.4",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt
new file mode 100644
index 00000000000..2664953ed0c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt
@@ -0,0 +1,14 @@
+Now we will copy the file to the local machine. On the terminal on your
+own local computer, enter:
+$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf .
+intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01
+$ ls -l
+total 899
+-rw-r--r-- 1 user staff 741995 Sep 18 09:53
+-rw-r--r-- 1 user staff      6 Sep 18 09:37 localfile.txt
+The file has been copied from the HPC to your local computer.
+It's also possible to copy entire directories (and their contents) with
+the -r flag. For example, if we want to copy the local directory
+dataset to $VSC_SCRATCH, we can use the following command (assuming
+you've created the scratch symlink):
+$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json
new file mode 100644
index 00000000000..757b533cf8d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-scp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_macos_paragraph_12.3",
+    "next_title": "connecting_macos_paragraph_12.5",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt
new file mode 100644
index 00000000000..51d39b548c3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt
@@ -0,0 +1,14 @@
+If you don't use the -r option to copy a directory, you will run into
+the following error:
+$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch
+dataset: not a regular file
+Using sftp
+The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file
+transfer and file management functionalities over any reliable data
+stream. It was designed as an extension of the Secure Shell protocol
+(SSH) version 2.0. This protocol assumes that it is run over a secure
+channel, such as SSH, that the server has already authenticated the
+client, and that the identity of the client user is available to the
+protocol.
+The sftp is an equivalent of the ftp command, with the difference that
+it uses the secure ssh protocol to connect to the clusters.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json
new file mode 100644
index 00000000000..d18c7c7deb5
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-sftp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_macos_paragraph_12.4",
+    "next_title": "connecting_macos_paragraph_12.6",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt
new file mode 100644
index 00000000000..4ae257101f1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt
@@ -0,0 +1,18 @@
+One easy way of starting a sftp session is
+$ sftp vsc40000@login.hpc.ugent.be
+Typical and popular commands inside an sftp session are:
+|                       |                                                                                      |
+|:--------------------------|:-------------------------------------------------------------------------------------|
+| cd ~/exmples/fibo     | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine)   |
+| ls                    | Get a list of the files in the current directory on the HPC.                   |
+| get fibo.py           | Copy the file "fibo.py" from the HPC                                           |
+| get tutorial/HPC.pdf  | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. |
+| lcd test              | Move to the "test" subdirectory on your local machine.                               |
+| lcd ..                | Move up one level in the local directory.                                            |
+| lls                   | Get local directory listing.                                                         |
+| put test.py           | Copy the local file test.py to the HPC.                                        |
+| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py.                       |
+| bye                   | Quit the sftp session                                                                |
+| **mget *.cc**             | Copy all the remote files with extension ".cc" to the local directory.               |
+| **mput *.h**              | Copy all the local files with extension ".h" to the HPC.                       |
+|                       |                                                                                      |
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json
new file mode 100644
index 00000000000..a8a4f2a3bab
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-sftp",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "links": {
+        "0": "",
+        "1": ""
+    },
+    "previous_title": "connecting_macos_paragraph_12.5",
+    "next_title": "connecting_macos_paragraph_12.7",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
new file mode 100644
index 00000000000..c5ed84e9ea0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
@@ -0,0 +1,12 @@
+Transfer Files tofrom the HPC
+Using a GUI (Cyberduck)
+Cyberduck is a graphical alternative to the scp command. It can be
+installed from <https://cyberduck.io>.
+This is the one-time setup you will need to do before connecting:
+1.  After starting Cyberduck, the Bookmark tab will show up. To add a
+    new bookmark, click on the "+" sign on the bottom left of the
+    window. A new window will open.
+2.  In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)".
+3.  In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in
+    your VSC account id (this looks like vsc40000).
+4.  Select the location of your SSH private key in the "SSH Private Key" field.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
new file mode 100644
index 00000000000..bd02ed8502f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-a-GUI-(Cyberduck)",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_paragraph_12",
+    "next_title": "connecting_macos_paragraph_13.2",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
new file mode 100644
index 00000000000..d48d0ce00a3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
@@ -0,0 +1,6 @@
+5.  Finally, type in a name for the bookmark in the "Nickname" field and
+    close the window by pressing on the red circle in the top left
+    corner of the window.
+To open the connection, click on the "Bookmarks" icon (which
+resembles an open book) and double-click on the bookmark you just
+created.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json
new file mode 100644
index 00000000000..344ff690d54
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-a-GUI-(Cyberduck)",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_macos_paragraph_13.1",
+    "next_title": "connecting_paragraph_14",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt
new file mode 100644
index 00000000000..773d03f0689
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt
@@ -0,0 +1,7 @@
+First Time connection to the HPC infrastructure
+ssh_exchange_identification: read: Connection reset by peer
+First Time connection to the HPC infrastructure
+The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH.
+If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal.
+If you have any issues connecting to the HPC after you've followed these
+steps, see Issues connecting to login node to troubleshoot.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json
new file mode 100644
index 00000000000..89431f52435
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "connecting",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/web_portal",
+        "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node"
+    },
+    "previous_title": "connecting_paragraph_3",
+    "next_title": "connecting_paragraph_5",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt
new file mode 100644
index 00000000000..d4c89b7e1c7
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt
@@ -0,0 +1,10 @@
+First Time connection to the HPC infrastructure
+Connect
+Open up a terminal and enter the following command to connect to the HPC.
+You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search.
+$ ssh vsc40000@login.hpc.ugent.be
+Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login
+node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command.
+The first time you make a connection to the login node, you will be
+asked to verify the authenticity of the login node. Please check
+Warning message when first connecting to new host on how to do this.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
new file mode 100644
index 00000000000..e17629a55f3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Connect",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host"
+    },
+    "previous_title": "connecting_paragraph_4",
+    "next_title": "connecting_macos_paragraph_5.2",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt
new file mode 100644
index 00000000000..6fa418464dd
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt
@@ -0,0 +1,7 @@
+A possible error message you can get if you previously saved your
+private key somewhere else than the default location
+($HOME/.ssh/id_rsa):
+Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
+In this case, use the -i option for the ssh command to specify the
+location of your private key. For example:
+$ ssh -i /home/example/my_keys
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json
new file mode 100644
index 00000000000..5c1d808739c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Connect",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
+    "previous_title": "connecting_macos_paragraph_5.1",
+    "next_title": "connecting_paragraph_6",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
new file mode 100644
index 00000000000..062ea570d96
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
@@ -0,0 +1,11 @@
+Adding multiple SSH public keys (optional)
+1.  Create a new public/private SSH key pair from Putty. Repeat the
+    process described in
+    section Generate a public/private key pair.
+2.  Go to <https://account.vscentrum.be/django/account/edit>
+3.  Upload the new SSH public key using the Add public key section. Make sure that your
+    public key is actually saved, because a public key will be refused
+    if it is too short, wrong type, or in a wrong format.
+4.  (optional) If you lost your key, you can delete the old key on the
+    same page. You should keep at least one valid public SSH key in your
+    account.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json
new file mode 100644
index 00000000000..eb4dd3b3a57
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "account",
+    "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Applying-for-the-account",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair"
+    },
+    "previous_title": "account_paragraph_10",
+    "next_title": "account_paragraph_12",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#adding-multiple-ssh-public-keys-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
new file mode 100644
index 00000000000..2ff8ffc1a08
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
@@ -0,0 +1,14 @@
+How do SSH keys work
+-   anyone who has the key (and the optional password) can unlock the
+    door and log in to the account.
+-   the door to your VSC account is special: it can have multiple
+    locks (SSH public keys) attached to it, and you only need to open
+    one lock with the corresponding key (SSH private key) to open
+    the door (log in to the account).
+Since all VSC clusters use Linux as their main operating system, you
+will need to get acquainted with using the command-line interface and
+using the terminal (see tutorial).
+A typical Windows environment does not come with pre-installed software
+to connect and run command-line executables on a HPC. Some tools need to be
+installed on your Windows machine first, before we can start the actual
+work.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
new file mode 100644
index 00000000000..08573d26bfe
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "account",
+    "subtitle": "How-do-SSH-keys-work",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "links": {
+        "0": "../../linux-tutorial"
+    },
+    "previous_title": "account_paragraph_3",
+    "next_title": "account_windows_paragraph_4.2",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#how-do-ssh-keys-work"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
new file mode 100644
index 00000000000..c89b45d8f2b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
@@ -0,0 +1,13 @@
+Get PuTTY: A free telnet/SSH client
+We recommend to use the PuTTY tools package, which is freely available.
+You do not need to install PuTTY, you can download the PuTTY and
+PuTTYgen executable and run it. This can be useful in situations where
+you do not have the required permissions to install software on the
+computer you are using. Alternatively, an installation package is also
+available.
+You can download PuTTY from the official address:
+<https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html>. You
+probably want the 64-bits version. If you can install software on your
+computer, you can use the "Package files", if not, you can download and
+use putty.exe and puttygen.exe in the "Alternative binary files"
+section.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
new file mode 100644
index 00000000000..bedb3d33218
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Get-PuTTY-A-free-telnetSSH-client",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_windows_paragraph_4.1",
+    "next_title": "account_windows_paragraph_4.3",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
new file mode 100644
index 00000000000..cebd1da3baf
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
@@ -0,0 +1,13 @@
+The PuTTY package consists of several components, but we'll only use
+two:
+1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal)
+2.  PuTTYgen: an RSA and DSA key generation utility (to generate a key pair,
+    see Generate a public/private key pair)
+Generating a public/private key pair
+Before requesting a VSC account, you need to generate a pair of ssh
+keys. You need 2 keys, a public and a private key. You can visualise the
+public key as a lock to which only you have the key (your private key).
+You can send a copy of your lock to anyone without any problems, because
+only you can open it, as long as you keep your private key secure. To
+generate a public/private key pair, you can use the PuTTYgen key
+generator.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
new file mode 100644
index 00000000000..a8fcacd08a0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "account",
+    "subtitle": "Generating-a-publicprivate-key-pair",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal",
+        "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair"
+    },
+    "previous_title": "account_windows_paragraph_4.2",
+    "next_title": "account_windows_paragraph_4.4",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt
new file mode 100644
index 00000000000..b7743b0b9ae
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt
@@ -0,0 +1,17 @@
+Start PuTTYgen.exe it and follow these steps:
+1.  In "Parameters" (at the bottom of the window), choose "RSA" and set the number of
+    bits in the key to 4096.
+2.  Click on "Generate". To generate the key, you must move the mouse cursor over
+    the PuTTYgen window (this generates some random data that PuTTYgen
+    uses to generate the key pair). Once the key pair is generated, your
+    public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file".
+3.  Next, it is advised to fill in the "Key comment" field to make it easier
+    identifiable afterwards.
+4.  Next, you should specify a passphrase in the "Key passphrase" field and retype it in
+    the "Confirm passphrase" field. Remember, the passphrase protects the private key against
+    unauthorised use, so it is best to choose one that is not too easy
+    to guess but that you can still remember. Using a passphrase is not
+    required, but we recommend you to use a good passphrase unless you
+    are certain that your computer's hard disk is encrypted with a
+    decent password. (If you are not sure your disk is encrypted, it
+    probably isn't.)
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
new file mode 100644
index 00000000000..5fe2e81aa3d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Generating-a-publicprivate-key-pair",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_windows_paragraph_4.3",
+    "next_title": "account_windows_paragraph_4.5",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt
new file mode 100644
index 00000000000..2326d87b6d7
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt
@@ -0,0 +1,7 @@
+5.  Save both the public and private keys in a folder on your personal
+    computer (We recommend to create and put them in the folder
+    "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the
+    buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and
+    "id_rsa.ppk" for the private key.
+If you use another program to generate a key pair, please remember that
+they need to be in the OpenSSH format to access the HPC clusters.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json
new file mode 100644
index 00000000000..79c584a8f41
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Generating-a-publicprivate-key-pair",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_windows_paragraph_4.4",
+    "next_title": "account_paragraph_5",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt
new file mode 100644
index 00000000000..b8dba743c0a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt
@@ -0,0 +1,13 @@
+Using an SSH agent (optional)
+It is possible to setup a SSH agent in Windows. This is an optional
+configuration to help you to keep all your SSH keys (if you have
+several) stored in the same key ring to avoid to type the SSH key
+password each time. The SSH agent is also necessary to enable SSH hops
+with key forwarding from Windows.
+Pageant is the SSH authentication agent used in windows. This agent should be
+available from the PuTTY installation package
+<https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html> or as
+stand alone binary package.
+After the installation just start the Pageant application in Windows,
+this will start the agent in background. The agent icon will be visible
+from the Windows panel.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json
new file mode 100644
index 00000000000..69771b48c86
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Using-an-SSH-agent-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_paragraph_5",
+    "next_title": "account_windows_paragraph_6.2",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt
new file mode 100644
index 00000000000..62ac04dd9aa
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt
@@ -0,0 +1,11 @@
+At this point the agent does not contain any private key. You should
+include the private key(s) generated in the previous section Generating a public/private key pair.
+1.  Click on "Add key"
+2.  Select the private key file generated in Generating a public/private key pair ("id_rsa.ppk" by default).
+3.  Enter the same SSH key password used to generate the key. After this
+    step the new key will be included in Pageant to manage the SSH
+    connections.
+4.  You can see the SSH key(s) available in the key ring just clicking
+    on "View Keys".
+5.  You can change PuTTY setup to use the SSH agent. Open PuTTY and check
+    Connection > SSH > Auth > Allow agent forwarding.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
new file mode 100644
index 00000000000..24670708070
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "account",
+    "subtitle": "Using-an-SSH-agent-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair",
+        "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair"
+    },
+    "previous_title": "account_windows_paragraph_6.1",
+    "next_title": "account_windows_paragraph_6.3",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt
new file mode 100644
index 00000000000..17c94975dec
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt
@@ -0,0 +1,5 @@
+Now you can connect to the login nodes as usual. The SSH agent will know
+which SSH key should be used and you do not have to type the SSH
+passwords each time, this task is done by Pageant agent automatically.
+It is also possible to use WinSCP with Pageant, see
+<https://winscp.net/eng/docs/ui_pageant> for more details.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json
new file mode 100644
index 00000000000..d47ad3bd215
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Using-an-SSH-agent-(optional)",
+    "title_depth": 3,
+    "directory": "account",
+    "parent_title": "Getting-ready-to-request-an-account",
+    "previous_title": "account_windows_paragraph_6.2",
+    "next_title": "account_paragraph_7",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt
new file mode 100644
index 00000000000..90c17263cf5
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt
@@ -0,0 +1,7 @@
+Applying for the account
+This file should have been stored in the directory
+"C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh"
+After you have uploaded your public key you will receive an e-mail with
+a link to confirm your e-mail address. After confirming your e-mail
+address the VSC staff will review and if applicable approve your
+account.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json
new file mode 100644
index 00000000000..d01ac9c3c16
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "account",
+    "subtitle": "Applying-for-the-account",
+    "title_depth": 2,
+    "directory": "account",
+    "parent_title": "account",
+    "previous_title": "account_paragraph_8",
+    "next_title": "account_paragraph_10",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#applying-for-the-account"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt
new file mode 100644
index 00000000000..aaf5a585ebd
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt
@@ -0,0 +1,5 @@
+First Time connection to the HPC infrastructure
+    A locale is a set of parameters that defines the user's language, country and
+    any special variant preferences that the user wants to see in their user
+    interface. Usually a locale identifier consists of at least a language
+    identifier and a region identifier.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json
new file mode 100644
index 00000000000..45c2bd2d90e
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "title_depth": 2,
+    "directory": "connecting",
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
+    "previous_title": "connecting_paragraph_9",
+    "next_title": "connecting_paragraph_11",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
new file mode 100644
index 00000000000..b43909c15c4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
@@ -0,0 +1,11 @@
+Transfer Files to/from the HPC
+Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
+WinSCP
+To transfer files to and from the cluster, we recommend the use of
+WinSCP, a graphical file management tool which can transfer files using
+secure protocols such as SFTP and SCP. WinSCP is freely available from
+<http://www.winscp.net>.
+To transfer your files using WinSCP,
+1.  Open the program
+2.  The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session"
+    1.  Click "New Site".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json
new file mode 100644
index 00000000000..d9fbc64790a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "WinSCP",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
+    "previous_title": "connecting_paragraph_10",
+    "next_title": "connecting_windows_paragraph_11.2",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt
new file mode 100644
index 00000000000..642bb4e34b6
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt
@@ -0,0 +1,19 @@
+    2.  Enter "login.hpc.ugent.be" in the "Host name" field.
+    3.  Enter your "vsc-account" in the "User name" field.
+    4.  Select "SCP" as the "file" protocol.
+    5.  Note that the password field remains empty.
+    6.  Click "Advanced...".
+    7.  Click "SSH > Authentication".
+    8.  Select your private key in the field "Private key file".
+3.  Press the "Save" button, to save the session under "Session > Sites" for future access.
+4.  Finally, when clicking on "Login", you will be asked for your key passphrase.
+The first time you make a connection to the login node, a Security
+Alert will appear and you will be asked to verify the authenticity of the
+login node.
+Make sure the fingerprint in the alert matches one of the following:
+- ssh-rsa 2048 10:2f:31:21:04:75:cb:ed:67:e0:d5:0c:a1:5a:f4:78
+- ssh-rsa 2048 SHA256:W8Wz0/FkkCR2ulN7+w8tNI9M0viRgFr2YlHrhKD2Dd0
+- ssh-ed25519 255 19:28:76:94:52:9d:ff:7d:fb:8b:27:b6:d7:69:42:eb
+- ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA
+- ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f
+- ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json
new file mode 100644
index 00000000000..65055dc0764
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "WinSCP",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
+    "previous_title": "connecting_windows_paragraph_11.1",
+    "next_title": "connecting_windows_paragraph_11.3",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt
new file mode 100644
index 00000000000..b52c614f263
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt
@@ -0,0 +1,7 @@
+If it does, press Yes, if it doesn't, please contact hpc@ugent.be.
+Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255
+rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using.
+It is safe to ignore this 255 versus 256 difference, but the part after should be
+identical.
+Now, try out whether you can transfer an arbitrary file from your local
+machine to the HPC and back.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json
new file mode 100644
index 00000000000..dd628f8e8cd
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "WinSCP",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
+    "previous_title": "connecting_windows_paragraph_11.2",
+    "next_title": "connecting_paragraph_12",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
new file mode 100644
index 00000000000..14f191fe61a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
@@ -0,0 +1,11 @@
+First Time connection to the HPC infrastructure
+ssh_exchange_identification: read: Connection reset by peer
+First Time connection to the HPC infrastructure
+The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH.
+If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal.
+If you have any issues connecting to the HPC after you've followed these
+steps, see Issues connecting to login node to troubleshoot.
+Open a Terminal
+You've generated a public/private key pair with PuTTYgen and have an
+approved account on the VSC clusters. The next step is to setup the
+connection to (one of) the HPC.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
new file mode 100644
index 00000000000..24d4df9e248
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Open-a-Terminal",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Connection-restrictions",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/web_portal",
+        "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node"
+    },
+    "previous_title": "connecting_paragraph_3",
+    "next_title": "connecting_windows_paragraph_4.2",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt
new file mode 100644
index 00000000000..e481b47bc2b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt
@@ -0,0 +1,13 @@
+In the screenshots, we show the setup for user
+to the HPC cluster via the login node "login.hpc.ugent.be".
+1.  Start the PuTTY executable putty.exe in your directory
+    C:\Program Files (x86)\PuTTY and the configuration screen will pop
+    up. As you will often use the PuTTY tool, we recommend adding a
+    shortcut on your desktop.
+2.  Within the category <Session\>, in the field <Host Name\>, enter the name of the
+    login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to.
+    
+3.  In the category "Connection > Data", in the field "Auto-login username", put in <vsc40000\> , which is your VSC
+    username that you have received by e-mail after your request was
+    approved.
+    
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json
new file mode 100644
index 00000000000..a783f797fdb
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Open-a-Terminal",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Connection-restrictions",
+    "previous_title": "connecting_windows_paragraph_4.1",
+    "next_title": "connecting_windows_paragraph_4.3",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt
new file mode 100644
index 00000000000..fbd5e76f278
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt
@@ -0,0 +1,13 @@
+4.  In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key
+    (i.e., "id_rsa.ppk") that you generated and saved above.
+5.  In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox.
+6.  Now go back to <Session\>, and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to
+    store the session information.
+    
+7.  Now pressing "Open", will open a terminal window and asks for you
+    passphrase.
+    
+8.  If this is your first time connecting, you will be asked to verify
+    the authenticity of the login node. Please see
+    section Warning message when first connecting to new host
+    on how to do this.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json
new file mode 100644
index 00000000000..9da459060af
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Open-a-Terminal",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Connection-restrictions",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host"
+    },
+    "previous_title": "connecting_windows_paragraph_4.2",
+    "next_title": "connecting_windows_paragraph_4.4",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt
new file mode 100644
index 00000000000..f4a1302750b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt
@@ -0,0 +1,11 @@
+9.  After entering your correct passphrase, you will be connected to the
+    login-node of the HPC.
+10. To check you can now "Print the Working Directory" (pwd) and check
+    the name of the computer, where you have logged in (hostname):
+        $ pwd
+    /user/home/gent/vsc400/vsc40000
+    $ hostname -f
+    gligar07.gastly.os
+    
+11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent")
+    from the list, "Load" it and press "Open".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json
new file mode 100644
index 00000000000..83127a292f8
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Open-a-Terminal",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Connection-restrictions",
+    "previous_title": "connecting_windows_paragraph_4.3",
+    "next_title": "connecting_paragraph_5",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal"
+}
\ No newline at end of file

From 692e77b51b5756859398d992293aa49d6cb4d527 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 15:06:45 +0200
Subject: [PATCH 124/152] fix for issue with html links

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 1530eedf31c..9e5baba82f3 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -176,7 +176,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
             # add references for every link of format <a href=...>
             if re.search(r'a href=.*', content):
                 link = content[8:-1]
-                curr_line = re.sub(f'<{content}>', "[" + str(len(linklist) + 1) + "]", curr_line)
+                curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist) + 1) + LINK_MARKER, curr_line)
                 linklist.append(link)
 
             # drop the syntax words

From 7f493a19d3265c4634267075958a7d4caf966e8c Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 15:08:23 +0200
Subject: [PATCH 125/152] fix for issue with html links

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 9e5baba82f3..690385a95d4 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -175,8 +175,8 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
 
             # add references for every link of format <a href=...>
             if re.search(r'a href=.*', content):
-                link = content[8:-1]
-                curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist) + 1) + LINK_MARKER, curr_line)
+                link = content[7:]
+                curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist)) + LINK_MARKER, curr_line)
                 linklist.append(link)
 
             # drop the syntax words

From 0e34396f77b0112171d8a30df36cd76c14fbac4d Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 15:30:41 +0200
Subject: [PATCH 126/152] fix for issue with relative links to the same
 document

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 690385a95d4..986ec2ef65b 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -158,7 +158,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
                 if "#" not in match[1]:
                     linklist.append(match[1])
                 else:
-                    linklist.append(DOCS_URL + "/" + main_title + "/" + match[1])
+                    linklist.append(DOCS_URL + "/" + main_title.replace(".md", "") + "/" + match[1])
             else:
                 linklist.append(DOCS_URL + "/" + match[1].replace(".md", "/").replace("index", "").rstrip("/"))
 

From fa0004482a3326f8385502b85e5c4ed9b4bf5410 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 15:31:33 +0200
Subject: [PATCH 127/152] added test for replace_markdown_markers

---
 .../tests/test_replace_markdown_markers.py    | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
new file mode 100644
index 00000000000..f4cee6dd75c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
@@ -0,0 +1,46 @@
+import pytest
+from chatbot_parser import replace_markdown_markers
+
+
+@pytest.mark.parametrize("input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist", [
+    # baseline test
+    ("A normal line with nothing special", [], False, "", "A normal line with nothing special", []),
+    # image 1
+    ("![image](a-nice-image.png)", [], False, "", "", []),
+    # image 2
+    ("![](img/Look-at-this-photograph.png)", [], False, "", "", []),
+    # link 1 (outside docs)
+    ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "",
+     "A line with a link§link§link§1§link§link§", ["another-link.be", "a-nice-link.com"]),
+    # link 2 (another document within the docs)
+    ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "",
+     "A line with a link to the docs§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
+    # link 3 (the same document)
+    ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md",
+     "A line with a link to the same doc§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
+    # codeblock
+    ("```shell", [], True, "", "", []),
+    # html syntax 1 (normal syntax)
+    ("A line with something in <b>Bold</b>", [], False, "", "A line with something in Bold", []),
+    # html syntax 2 (link)
+    ("A line with another link<a href=website.com>", ["other-website.com"], False, "",
+     "A line with another link§link§link§1§link§link§", ["other-website.com", "website.com"]),
+    # html syntax 3 (style)
+    ("<p style='text-align: center'>A line with style</p>", [], False, "", "A line with style", []),
+    # Bot comment
+    ("<!--INPUT_FOR_BOTSomething about the following table-->", [], False, "", "Something about the following table", []),
+    # non-Bot comment
+    ("<!--Something else about the following table-->", [], False, "", "", []),
+    # something else with <>
+    ("A line with an example where you should put <your own input>", [], False, "", "A line with an example where you should put <your own input>", []),
+    # info/tips/warnings
+    ("!!! warning", [], False, "", " warning", []),
+    # collapsable admonitions
+    ("??? note", [], False, "", " note", []),
+    # Markdown syntax 1 (not in code block)
+    ("`Line` **with** ++a++ _lot_ *of* _++markdown++_ `syntax`", [], False, "", "Line with a lot of markdown syntax", []),
+    # Markdown syntax 2 (in code block)
+    ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [])
+])
+def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist):
+    assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist)

From b3952b2e769483bc1a6dc7c146b847f7519843a3 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 16:22:41 +0200
Subject: [PATCH 128/152] fix to small inconsistency in metadata

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 986ec2ef65b..371ee52e6cd 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -805,7 +805,7 @@ def insert_links(text, links, options):
 
     for link_number in links.keys():
         if link_number in present_links:
-            new_links[len(new_links.keys())] = links[link_number]
+            new_links[str(len(new_links.keys()))] = links[link_number]
 
     return text, new_links
 

From 73072bf2cd57e28b8dafbb2e88ef30f52c95958e Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Tue, 27 Aug 2024 16:26:41 +0200
Subject: [PATCH 129/152] added test for insert_links

---
 .../tests/test_insert_links.py                | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py
new file mode 100644
index 00000000000..9109f2518ad
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py
@@ -0,0 +1,31 @@
+import pytest
+from chatbot_parser import insert_links
+
+options_include = {"INCLUDE_LINKS_IN_PLAINTEXT": True}
+options_leave_out = {"INCLUDE_LINKS_IN_PLAINTEXT": False}
+links_input = {"0": "https://first_link.com", "1": "https://second_link.be", "2": "https://docs.hpc.ugent.be/account#welcome-e-mail", "3": "https://final-link.org"}
+
+
+@pytest.mark.parametrize("text_input, options_input, text_output, new_links", [
+    # Text without links
+    # don't include links
+    ("Text without links\nand with two lines.", options_leave_out, "Text without links\nand with two lines.", {}),
+    # include links
+    ("Text without links\nand with two lines.", options_include, "Text without links\nand with two lines.", {}),
+    # Text with all links
+    # don't include links
+    ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_leave_out,
+     "Text with all the links\nand with multiple lines.\n\n\n\n", links_input),
+    # include links
+    ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_include,
+     "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://second_link.be \n https://docs.hpc.ugent.be/account#welcome-e-mail \n https://final-link.org ", links_input),
+    # Text with some links
+    # don't include links
+    ("Text with all the links\nand with multiple lines.\n§link§link§1§link§link§\n§link§link§3§link§link§", options_leave_out,
+     "Text with all the links\nand with multiple lines.\n\n", {"0": "https://second_link.be", "1": "https://final-link.org"}),
+    # include links
+    ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§2§link§link§", options_include,
+     "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://docs.hpc.ugent.be/account#welcome-e-mail ", {"0": "https://first_link.com", "1": "https://docs.hpc.ugent.be/account#welcome-e-mail"})
+])
+def test_insert_links(text_input, options_input, text_output, new_links):
+    assert insert_links(text_input, links_input, options_input) == (text_output, new_links)

From 31613094e7e8fd60e74a0d639fcb28b08d262e65 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 28 Aug 2024 13:16:04 +0200
Subject: [PATCH 130/152] make sure paragraphs only include full lists

---
 .../chatbot_parser.py                         | 31 +++++++++++--
 .../generic/account/account_paragraph_12.txt  |  3 --
 .../generic/account/account_paragraph_3.txt   |  6 +++
 .../connecting/connecting_paragraph_1.txt     |  5 +++
 .../connecting/connecting_paragraph_2.txt     | 14 +++---
 .../connecting/connecting_paragraph_3.txt     | 16 +++----
 .../connecting_paragraph_3_metadata.json      |  8 +++-
 .../account/account_linux_paragraph_11.1.txt  |  5 ++-
 .../account/account_linux_paragraph_4.1.txt   |  6 ---
 .../account/account_linux_paragraph_5.2.txt   |  1 +
 .../account/account_linux_paragraph_5.3.txt   |  1 -
 .../connecting_linux_paragraph_4.1.txt        |  7 ---
 ...nnecting_linux_paragraph_4.1_metadata.json | 15 -------
 .../account/account_macos_paragraph_11.1.txt  |  5 ++-
 .../account/account_macos_paragraph_4.1.txt   |  6 ---
 .../connecting_macos_paragraph_13.1.txt       |  3 ++
 .../connecting_macos_paragraph_13.2.txt       |  3 --
 .../connecting_macos_paragraph_4.1.txt        |  7 ---
 ...nnecting_macos_paragraph_4.1_metadata.json | 15 -------
 .../account_windows_paragraph_11.1.txt        |  5 ++-
 .../account/account_windows_paragraph_4.1.txt | 13 +++---
 ...ccount_windows_paragraph_4.1_metadata.json |  4 +-
 .../account/account_windows_paragraph_4.2.txt | 13 +++---
 ...ccount_windows_paragraph_4.2_metadata.json |  8 +++-
 .../account/account_windows_paragraph_4.3.txt | 28 +++++++++---
 ...ccount_windows_paragraph_4.3_metadata.json |  4 --
 .../account/account_windows_paragraph_4.4.txt | 19 +-------
 ...ccount_windows_paragraph_4.4_metadata.json |  2 +-
 .../account/account_windows_paragraph_4.5.txt |  7 ---
 ...ccount_windows_paragraph_4.5_metadata.json | 11 -----
 .../connecting_windows_paragraph_11.1.txt     |  9 ++++
 .../connecting_windows_paragraph_11.2.txt     | 10 +----
 .../connecting_windows_paragraph_11.3.txt     |  1 -
 .../connecting_windows_paragraph_4.1.txt      | 43 ++++++++++++++++---
 ...ecting_windows_paragraph_4.1_metadata.json |  7 ++-
 .../connecting_windows_paragraph_4.2.txt      | 13 ------
 ...ecting_windows_paragraph_4.2_metadata.json | 11 -----
 .../connecting_windows_paragraph_4.3.txt      | 13 ------
 ...ecting_windows_paragraph_4.3_metadata.json | 14 ------
 .../connecting_windows_paragraph_4.4.txt      | 11 -----
 ...ecting_windows_paragraph_4.4_metadata.json | 11 -----
 41 files changed, 172 insertions(+), 242 deletions(-)
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 371ee52e6cd..6ec1aa0e9b2 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -7,7 +7,7 @@
 import re
 import shutil
 import yaml
-from itertools import chain
+from itertools import chain, tee, zip_longest
 from pathlib import Path
 from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template
 
@@ -402,6 +402,9 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
     # variable to indicate that previous section was one with if-statements
     previous_contained_if = False
 
+    # variable to indicate that the previous line was part of a list
+    in_list = False
+
     # paragraph number to add to title
     paragraph_number = 1
 
@@ -410,7 +413,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
 
     # TODO: define metadata data if split occurs on paragraphs and last_title and title_level are known (placeholder in place right now)
     if current_paragraph_number != -1:
-        last_title_level = 5
+        last_title_level = 4
         last_dir = "PLACEHOLDER"
 
     # list to keep track of most recent directories on each title level
@@ -418,12 +421,32 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
 
     with open(file, 'r') as readfile:
 
-        for line in readfile:
+        # Create two independent iterators from the original file iterator (needed to check for lists)
+        current_line, next_line = tee(readfile)
+
+        # Advance the next_line iterator by one step, so it is always one step ahead
+        next(next_line, None)
+
+        # Process the lines
+        for line, nxt in zip_longest(current_line, next_line, fillvalue=""):
 
             # detect if-statements starting or ending on the current line
             in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(
                 re.findall(IF_MANGLED_PATTERNS[ENDIF], line))
 
+            # detect whether the current line is in a list
+            if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line):  # beginning of a list entry
+                in_list = True
+            elif re.search(r'^\s{2,}.+$', line) and in_list:  # middle of a list entry
+                pass
+            elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list:  # line(s) between list entries
+                pass
+            else:
+                in_list = False
+
+            if in_list:
+                print(line[:-1])
+
             # only split up if current line is in a fully non-os-specific section
             if in_if_statement == 0:
 
@@ -434,7 +457,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
                     in_code_block = not in_code_block
 
                 # check whether a new paragraph should be started
-                if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block:
+                if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block and not in_list:
 
                     # create a title for the previous paragraph
                     if current_paragraph_number == -1:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt
index 6ee6880838e..7ecd78e5c9f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt
@@ -1,6 +1,3 @@
-5.  Take into account that it will take some time before the new SSH
-    public key is active in your account on the system; waiting for
-    15-30 minutes should be sufficient.
 Computation Workflow on the HPC
 A typical Computation workflow will be:
 1.  Connect to the HPC
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
index 9632ef1f5af..e4946869273 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
@@ -9,3 +9,9 @@ How do SSH keys work?
     VSC and they put it on the door that gives access to your account.
 -   the SSH private key is like a physical key: you don't hand it out
     to other people.
+-   anyone who has the key (and the optional password) can unlock the
+    door and log in to the account.
+-   the door to your VSC account is special: it can have multiple
+    locks (SSH public keys) attached to it, and you only need to open
+    one lock with the corresponding key (SSH private key) to open
+    the door (log in to the account).
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt
index b144712c9df..bc5a1f80140 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt
@@ -11,3 +11,8 @@ you need to do or know:
 3.  Optionally, if you wish to use programs with a **graphical user
     interface**, you will need an X-server on your client system and log
     in to the login nodes with X-forwarding enabled.
+4.  Often several versions of software packages and libraries are
+    installed, so you need to select the ones you need. To manage
+    different versions efficiently, the VSC clusters use so-called
+    modules, so you will need to select and load the modules that
+    you need.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
index 4c1d879b954..b150c8fbb28 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
@@ -1,8 +1,3 @@
-4.  Often several versions of software packages and libraries are
-    installed, so you need to select the ones you need. To manage
-    different versions efficiently, the VSC clusters use so-called
-    modules, so you will need to select and load the modules that
-    you need.
 Connection restrictions
 Since March 20th 2020, restrictions are in place that limit from where
 you can connect to the VSC HPC infrastructure, in response to security
@@ -12,3 +7,12 @@ networks, and from (most) Belgian commercial internet providers.
 All other IP domains are blocked by default. If you are connecting from
 an IP address that is not allowed direct access, you have the following
 options to get access to VSC login nodes:
+-   Use an VPN connection to connect to UGent the network (recommended).
+-   Whitelist your IP address automatically by accessing
+    <https://firewall.vscentrum.be> and log in with your UGent account.
+    -   While this web connection is active new SSH sessions can be
+        started.
+    -   Active SSH sessions will remain active even when this web page
+        is closed.
+-   Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your
+    IP range (e.g., for industry access, automated processes).
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
index 668a1e6df57..31dd6463266 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
@@ -1,12 +1,10 @@
--   Use an VPN connection to connect to UGent the network (recommended).
--   Whitelist your IP address automatically by accessing
-    <https://firewall.vscentrum.be> and log in with your UGent account.
-    -   While this web connection is active new SSH sessions can be
-        started.
-    -   Active SSH sessions will remain active even when this web page
-        is closed.
--   Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your
-    IP range (e.g., for industry access, automated processes).
 Trying to establish an SSH connection from an IP address that does not
 adhere to these restrictions will result in an immediate failure to
 connect, with an error message like:
+ssh_exchange_identification: read: Connection reset by peer
+
+First Time connection to the HPC infrastructure
+The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH.
+If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal.
+If you have any issues connecting to the HPC after you've followed these
+steps, see Issues connecting to login node to troubleshoot.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
index 4dc75d7dcf3..471e6bfcbf2 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
@@ -1,11 +1,15 @@
 {
     "main_title": "connecting",
-    "subtitle": "Connection-restrictions",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
     "title_depth": 2,
     "directory": "connecting",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/web_portal",
+        "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node"
+    },
     "parent_title": "",
     "previous_title": "connecting_paragraph_2",
     "next_title": "connecting_paragraph_4",
     "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions"
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
index 3a46897bdee..b2734cc9f89 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
@@ -8,4 +8,7 @@ Adding multiple SSH public keys (optional)
     if it is too short, wrong type, or in a wrong format.
 4.  (optional) If you lost your key, you can delete the old key on the
     same page. You should keep at least one valid public SSH key in your
-    account.
\ No newline at end of file
+    account.
+5.  Take into account that it will take some time before the new SSH
+    public key is active in your account on the system; waiting for
+    15-30 minutes should be sufficient.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt
index 1395e2ee7bd..3a282a73a15 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt
@@ -1,10 +1,4 @@
 How do SSH keys work
--   anyone who has the key (and the optional password) can unlock the
-    door and log in to the account.
--   the door to your VSC account is special: it can have multiple
-    locks (SSH public keys) attached to it, and you only need to open
-    one lock with the corresponding key (SSH private key) to open
-    the door (log in to the account).
 Since all VSC clusters use Linux as their main operating system, you
 will need to get acquainted with using the command-line interface and
 using the terminal (see tutorial).
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt
index a166dd14503..318f913fba3 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt
@@ -11,3 +11,4 @@ following commands:
 1.  ssh-keygen: to generate the SSH key pair (public + private key);
 2.  ssh: to open a shell on a remote machine;
 3.  sftp: a secure equivalent of ftp;
+4.  scp: a secure equivalent of the remote copy command rcp.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
index 2e8fe9e3a24..9d84f459724 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
@@ -1,4 +1,3 @@
-4.  scp: a secure equivalent of the remote copy command rcp.
 Generate a public/private key pair with OpenSSH
 A key pair might already be present in the default location inside your
 home directory. Therefore, we first check if a key is available with the
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt
deleted file mode 100644
index 773d03f0689..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-First Time connection to the HPC infrastructure
-ssh_exchange_identification: read: Connection reset by peer
-First Time connection to the HPC infrastructure
-The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH.
-If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal.
-If you have any issues connecting to the HPC after you've followed these
-steps, see Issues connecting to login node to troubleshoot.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json
deleted file mode 100644
index f6745fc31dc..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/web_portal",
-        "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node"
-    },
-    "previous_title": "connecting_paragraph_3",
-    "next_title": "connecting_paragraph_5",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
index 3a46897bdee..b2734cc9f89 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
@@ -8,4 +8,7 @@ Adding multiple SSH public keys (optional)
     if it is too short, wrong type, or in a wrong format.
 4.  (optional) If you lost your key, you can delete the old key on the
     same page. You should keep at least one valid public SSH key in your
-    account.
\ No newline at end of file
+    account.
+5.  Take into account that it will take some time before the new SSH
+    public key is active in your account on the system; waiting for
+    15-30 minutes should be sufficient.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt
index 1395e2ee7bd..3a282a73a15 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt
@@ -1,10 +1,4 @@
 How do SSH keys work
--   anyone who has the key (and the optional password) can unlock the
-    door and log in to the account.
--   the door to your VSC account is special: it can have multiple
-    locks (SSH public keys) attached to it, and you only need to open
-    one lock with the corresponding key (SSH private key) to open
-    the door (log in to the account).
 Since all VSC clusters use Linux as their main operating system, you
 will need to get acquainted with using the command-line interface and
 using the terminal (see tutorial).
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
index c5ed84e9ea0..20a4acb40a8 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
@@ -10,3 +10,6 @@ This is the one-time setup you will need to do before connecting:
 3.  In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in
     your VSC account id (this looks like vsc40000).
 4.  Select the location of your SSH private key in the "SSH Private Key" field.
+5.  Finally, type in a name for the bookmark in the "Nickname" field and
+    close the window by pressing on the red circle in the top left
+    corner of the window.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
index d48d0ce00a3..1d20edf411f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
@@ -1,6 +1,3 @@
-5.  Finally, type in a name for the bookmark in the "Nickname" field and
-    close the window by pressing on the red circle in the top left
-    corner of the window.
 To open the connection, click on the "Bookmarks" icon (which
 resembles an open book) and double-click on the bookmark you just
 created.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt
deleted file mode 100644
index 773d03f0689..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-First Time connection to the HPC infrastructure
-ssh_exchange_identification: read: Connection reset by peer
-First Time connection to the HPC infrastructure
-The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH.
-If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal.
-If you have any issues connecting to the HPC after you've followed these
-steps, see Issues connecting to login node to troubleshoot.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json
deleted file mode 100644
index 89431f52435..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/web_portal",
-        "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node"
-    },
-    "previous_title": "connecting_paragraph_3",
-    "next_title": "connecting_paragraph_5",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
index 062ea570d96..0863009f290 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
@@ -8,4 +8,7 @@ Adding multiple SSH public keys (optional)
     if it is too short, wrong type, or in a wrong format.
 4.  (optional) If you lost your key, you can delete the old key on the
     same page. You should keep at least one valid public SSH key in your
-    account.
\ No newline at end of file
+    account.
+5.  Take into account that it will take some time before the new SSH
+    public key is active in your account on the system; waiting for
+    15-30 minutes should be sufficient.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
index 2ff8ffc1a08..1e70493305f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
@@ -1,10 +1,4 @@
 How do SSH keys work
--   anyone who has the key (and the optional password) can unlock the
-    door and log in to the account.
--   the door to your VSC account is special: it can have multiple
-    locks (SSH public keys) attached to it, and you only need to open
-    one lock with the corresponding key (SSH private key) to open
-    the door (log in to the account).
 Since all VSC clusters use Linux as their main operating system, you
 will need to get acquainted with using the command-line interface and
 using the terminal (see tutorial).
@@ -12,3 +6,10 @@ A typical Windows environment does not come with pre-installed software
 to connect and run command-line executables on a HPC. Some tools need to be
 installed on your Windows machine first, before we can start the actual
 work.
+Get PuTTY: A free telnet/SSH client
+We recommend to use the PuTTY tools package, which is freely available.
+You do not need to install PuTTY, you can download the PuTTY and
+PuTTYgen executable and run it. This can be useful in situations where
+you do not have the required permissions to install software on the
+computer you are using. Alternatively, an installation package is also
+available.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
index 08573d26bfe..ce74735c538 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
@@ -1,6 +1,6 @@
 {
     "main_title": "account",
-    "subtitle": "How-do-SSH-keys-work",
+    "subtitle": "Get-PuTTY-A-free-telnetSSH-client",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
@@ -10,5 +10,5 @@
     "previous_title": "account_paragraph_3",
     "next_title": "account_windows_paragraph_4.2",
     "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#how-do-ssh-keys-work"
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
index c89b45d8f2b..1a30a219fec 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
@@ -1,13 +1,12 @@
-Get PuTTY: A free telnet/SSH client
-We recommend to use the PuTTY tools package, which is freely available.
-You do not need to install PuTTY, you can download the PuTTY and
-PuTTYgen executable and run it. This can be useful in situations where
-you do not have the required permissions to install software on the
-computer you are using. Alternatively, an installation package is also
-available.
 You can download PuTTY from the official address:
 <https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html>. You
 probably want the 64-bits version. If you can install software on your
 computer, you can use the "Package files", if not, you can download and
 use putty.exe and puttygen.exe in the "Alternative binary files"
 section.
+The PuTTY package consists of several components, but we'll only use
+two:
+1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal)
+2.  PuTTYgen: an RSA and DSA key generation utility (to generate a key pair,
+    see Generate a public/private key pair)
+Generating a public/private key pair
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
index bedb3d33218..9616b41452a 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
@@ -1,11 +1,15 @@
 {
     "main_title": "account",
-    "subtitle": "Get-PuTTY-A-free-telnetSSH-client",
+    "subtitle": "Generating-a-publicprivate-key-pair",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal",
+        "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair"
+    },
     "previous_title": "account_windows_paragraph_4.1",
     "next_title": "account_windows_paragraph_4.3",
     "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client"
+    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
index cebd1da3baf..de5d164bb7a 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
@@ -1,9 +1,3 @@
-The PuTTY package consists of several components, but we'll only use
-two:
-1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal)
-2.  PuTTYgen: an RSA and DSA key generation utility (to generate a key pair,
-    see Generate a public/private key pair)
-Generating a public/private key pair
 Before requesting a VSC account, you need to generate a pair of ssh
 keys. You need 2 keys, a public and a private key. You can visualise the
 public key as a lock to which only you have the key (your private key).
@@ -11,3 +5,25 @@ You can send a copy of your lock to anyone without any problems, because
 only you can open it, as long as you keep your private key secure. To
 generate a public/private key pair, you can use the PuTTYgen key
 generator.
+Start PuTTYgen.exe it and follow these steps:
+1.  In "Parameters" (at the bottom of the window), choose "RSA" and set the number of
+    bits in the key to 4096.
+2.  Click on "Generate". To generate the key, you must move the mouse cursor over
+    the PuTTYgen window (this generates some random data that PuTTYgen
+    uses to generate the key pair). Once the key pair is generated, your
+    public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file".
+3.  Next, it is advised to fill in the "Key comment" field to make it easier
+    identifiable afterwards.
+4.  Next, you should specify a passphrase in the "Key passphrase" field and retype it in
+    the "Confirm passphrase" field. Remember, the passphrase protects the private key against
+    unauthorised use, so it is best to choose one that is not too easy
+    to guess but that you can still remember. Using a passphrase is not
+    required, but we recommend you to use a good passphrase unless you
+    are certain that your computer's hard disk is encrypted with a
+    decent password. (If you are not sure your disk is encrypted, it
+    probably isn't.)
+5.  Save both the public and private keys in a folder on your personal
+    computer (We recommend to create and put them in the folder
+    "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the
+    buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and
+    "id_rsa.ppk" for the private key.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
index a8fcacd08a0..06b6e998c08 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
@@ -4,10 +4,6 @@
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal",
-        "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair"
-    },
     "previous_title": "account_windows_paragraph_4.2",
     "next_title": "account_windows_paragraph_4.4",
     "OS": "windows",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt
index b7743b0b9ae..d0425d6738f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt
@@ -1,17 +1,2 @@
-Start PuTTYgen.exe it and follow these steps:
-1.  In "Parameters" (at the bottom of the window), choose "RSA" and set the number of
-    bits in the key to 4096.
-2.  Click on "Generate". To generate the key, you must move the mouse cursor over
-    the PuTTYgen window (this generates some random data that PuTTYgen
-    uses to generate the key pair). Once the key pair is generated, your
-    public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file".
-3.  Next, it is advised to fill in the "Key comment" field to make it easier
-    identifiable afterwards.
-4.  Next, you should specify a passphrase in the "Key passphrase" field and retype it in
-    the "Confirm passphrase" field. Remember, the passphrase protects the private key against
-    unauthorised use, so it is best to choose one that is not too easy
-    to guess but that you can still remember. Using a passphrase is not
-    required, but we recommend you to use a good passphrase unless you
-    are certain that your computer's hard disk is encrypted with a
-    decent password. (If you are not sure your disk is encrypted, it
-    probably isn't.)
+If you use another program to generate a key pair, please remember that
+they need to be in the OpenSSH format to access the HPC clusters.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
index 5fe2e81aa3d..fba810e7299 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
@@ -5,7 +5,7 @@
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
     "previous_title": "account_windows_paragraph_4.3",
-    "next_title": "account_windows_paragraph_4.5",
+    "next_title": "account_paragraph_5",
     "OS": "windows",
     "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt
deleted file mode 100644
index 2326d87b6d7..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-5.  Save both the public and private keys in a folder on your personal
-    computer (We recommend to create and put them in the folder
-    "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the
-    buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and
-    "id_rsa.ppk" for the private key.
-If you use another program to generate a key pair, please remember that
-they need to be in the OpenSSH format to access the HPC clusters.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json
deleted file mode 100644
index 79c584a8f41..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generating-a-publicprivate-key-pair",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_windows_paragraph_4.4",
-    "next_title": "account_paragraph_5",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
index b43909c15c4..a4f00ba7a5f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
@@ -9,3 +9,12 @@ To transfer your files using WinSCP,
 1.  Open the program
 2.  The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session"
     1.  Click "New Site".
+    2.  Enter "login.hpc.ugent.be" in the "Host name" field.
+    3.  Enter your "vsc-account" in the "User name" field.
+    4.  Select "SCP" as the "file" protocol.
+    5.  Note that the password field remains empty.
+    6.  Click "Advanced...".
+    7.  Click "SSH > Authentication".
+    8.  Select your private key in the field "Private key file".
+3.  Press the "Save" button, to save the session under "Session > Sites" for future access.
+4.  Finally, when clicking on "Login", you will be asked for your key passphrase.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt
index 642bb4e34b6..82c71ac4129 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt
@@ -1,12 +1,3 @@
-    2.  Enter "login.hpc.ugent.be" in the "Host name" field.
-    3.  Enter your "vsc-account" in the "User name" field.
-    4.  Select "SCP" as the "file" protocol.
-    5.  Note that the password field remains empty.
-    6.  Click "Advanced...".
-    7.  Click "SSH > Authentication".
-    8.  Select your private key in the field "Private key file".
-3.  Press the "Save" button, to save the session under "Session > Sites" for future access.
-4.  Finally, when clicking on "Login", you will be asked for your key passphrase.
 The first time you make a connection to the login node, a Security
 Alert will appear and you will be asked to verify the authenticity of the
 login node.
@@ -17,3 +8,4 @@ Make sure the fingerprint in the alert matches one of the following:
 - ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA
 - ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f
 - ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ
+If it does, press Yes, if it doesn't, please contact hpc@ugent.be.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt
index b52c614f263..c0ffe6b4602 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt
@@ -1,4 +1,3 @@
-If it does, press Yes, if it doesn't, please contact hpc@ugent.be.
 Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255
 rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using.
 It is safe to ignore this 255 versus 256 difference, but the part after should be
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
index 14f191fe61a..b5ecfb93e88 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
@@ -1,11 +1,42 @@
 First Time connection to the HPC infrastructure
-ssh_exchange_identification: read: Connection reset by peer
-First Time connection to the HPC infrastructure
-The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH.
-If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal.
-If you have any issues connecting to the HPC after you've followed these
-steps, see Issues connecting to login node to troubleshoot.
 Open a Terminal
 You've generated a public/private key pair with PuTTYgen and have an
 approved account on the VSC clusters. The next step is to setup the
 connection to (one of) the HPC.
+In the screenshots, we show the setup for user
+to the HPC cluster via the login node "login.hpc.ugent.be".
+1.  Start the PuTTY executable putty.exe in your directory
+    C:\Program Files (x86)\PuTTY and the configuration screen will pop
+    up. As you will often use the PuTTY tool, we recommend adding a
+    shortcut on your desktop.
+2.  Within the category <Session\>, in the field <Host Name\>, enter the name of the
+    login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to.
+    
+3.  In the category "Connection > Data", in the field "Auto-login username", put in <vsc40000\> , which is your VSC
+    username that you have received by e-mail after your request was
+    approved.
+    
+4.  In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key
+    (i.e., "id_rsa.ppk") that you generated and saved above.
+5.  In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox.
+6.  Now go back to <Session\>, and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to
+    store the session information.
+    
+7.  Now pressing "Open", will open a terminal window and asks for you
+    passphrase.
+    
+8.  If this is your first time connecting, you will be asked to verify
+    the authenticity of the login node. Please see
+    section Warning message when first connecting to new host
+    on how to do this.
+9.  After entering your correct passphrase, you will be connected to the
+    login-node of the HPC.
+10. To check you can now "Print the Working Directory" (pwd) and check
+    the name of the computer, where you have logged in (hostname):
+        $ pwd
+    /user/home/gent/vsc400/vsc40000
+    $ hostname -f
+    gligar07.gastly.os
+    
+11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent")
+    from the list, "Load" it and press "Open".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
index 24d4df9e248..ef4de8bd8e4 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
@@ -3,13 +3,12 @@
     "subtitle": "Open-a-Terminal",
     "title_depth": 3,
     "directory": "connecting",
-    "parent_title": "Connection-restrictions",
+    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
     "links": {
-        "0": "https://docs.hpc.ugent.be/web_portal",
-        "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node"
+        "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host"
     },
     "previous_title": "connecting_paragraph_3",
-    "next_title": "connecting_windows_paragraph_4.2",
+    "next_title": "connecting_paragraph_5",
     "OS": "windows",
     "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt
deleted file mode 100644
index e481b47bc2b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-In the screenshots, we show the setup for user
-to the HPC cluster via the login node "login.hpc.ugent.be".
-1.  Start the PuTTY executable putty.exe in your directory
-    C:\Program Files (x86)\PuTTY and the configuration screen will pop
-    up. As you will often use the PuTTY tool, we recommend adding a
-    shortcut on your desktop.
-2.  Within the category <Session\>, in the field <Host Name\>, enter the name of the
-    login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to.
-    
-3.  In the category "Connection > Data", in the field "Auto-login username", put in <vsc40000\> , which is your VSC
-    username that you have received by e-mail after your request was
-    approved.
-    
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json
deleted file mode 100644
index a783f797fdb..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Open-a-Terminal",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Connection-restrictions",
-    "previous_title": "connecting_windows_paragraph_4.1",
-    "next_title": "connecting_windows_paragraph_4.3",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt
deleted file mode 100644
index fbd5e76f278..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-4.  In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key
-    (i.e., "id_rsa.ppk") that you generated and saved above.
-5.  In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox.
-6.  Now go back to <Session\>, and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to
-    store the session information.
-    
-7.  Now pressing "Open", will open a terminal window and asks for you
-    passphrase.
-    
-8.  If this is your first time connecting, you will be asked to verify
-    the authenticity of the login node. Please see
-    section Warning message when first connecting to new host
-    on how to do this.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json
deleted file mode 100644
index 9da459060af..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Open-a-Terminal",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Connection-restrictions",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host"
-    },
-    "previous_title": "connecting_windows_paragraph_4.2",
-    "next_title": "connecting_windows_paragraph_4.4",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt
deleted file mode 100644
index f4a1302750b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-9.  After entering your correct passphrase, you will be connected to the
-    login-node of the HPC.
-10. To check you can now "Print the Working Directory" (pwd) and check
-    the name of the computer, where you have logged in (hostname):
-        $ pwd
-    /user/home/gent/vsc400/vsc40000
-    $ hostname -f
-    gligar07.gastly.os
-    
-11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent")
-    from the list, "Load" it and press "Open".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json
deleted file mode 100644
index 83127a292f8..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Open-a-Terminal",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Connection-restrictions",
-    "previous_title": "connecting_windows_paragraph_4.3",
-    "next_title": "connecting_paragraph_5",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal"
-}
\ No newline at end of file

From 3407be3ea8b45de9d43e91fda8c4730ab0ae34e2 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 28 Aug 2024 13:43:49 +0200
Subject: [PATCH 131/152] adapted to the new source files

---
 .../chatbot_parser.py                         | 11 ++-----
 .../generic/account/account_paragraph_10.txt  |  1 -
 .../compiling_your_software_paragraph_1.txt   | 10 ++++++
 .../compiling_your_software_paragraph_10.txt  | 19 +++++++++++
 ...g_your_software_paragraph_10_metadata.json | 11 +++++++
 .../compiling_your_software_paragraph_11.txt  | 20 ++++++++++++
 ...g_your_software_paragraph_11_metadata.json | 11 +++++++
 .../compiling_your_software_paragraph_12.txt  |  9 ++++++
 ...g_your_software_paragraph_12_metadata.json | 11 +++++++
 ...ng_your_software_paragraph_1_metadata.json | 11 +++++++
 .../compiling_your_software_paragraph_2.txt   | 13 ++++++++
 ...ng_your_software_paragraph_2_metadata.json | 11 +++++++
 .../compiling_your_software_paragraph_3.txt   | 13 ++++++++
 ...ng_your_software_paragraph_3_metadata.json | 11 +++++++
 .../compiling_your_software_paragraph_4.txt   | 15 +++++++++
 ...ng_your_software_paragraph_4_metadata.json | 11 +++++++
 .../compiling_your_software_paragraph_5.txt   | 16 ++++++++++
 ...ng_your_software_paragraph_5_metadata.json | 11 +++++++
 .../compiling_your_software_paragraph_6.txt   | 30 +++++++++++++++++
 ...ng_your_software_paragraph_6_metadata.json | 14 ++++++++
 .../compiling_your_software_paragraph_7.txt   | 15 +++++++++
 ...ng_your_software_paragraph_7_metadata.json | 11 +++++++
 .../compiling_your_software_paragraph_8.txt   | 19 +++++++++++
 ...ng_your_software_paragraph_8_metadata.json | 11 +++++++
 .../compiling_your_software_paragraph_9.txt   | 32 +++++++++++++++++++
 ...ng_your_software_paragraph_9_metadata.json | 11 +++++++
 .../account/account_linux_paragraph_5.3.txt   |  2 +-
 .../account/account_linux_paragraph_5.4.txt   |  7 +---
 .../account/account_linux_paragraph_7.1.txt   |  2 +-
 .../account/account_linux_paragraph_7.2.txt   |  2 +-
 .../connecting_linux_paragraph_10.1.txt       | 10 +++---
 .../connecting_linux_paragraph_12.4.txt       |  2 +-
 .../connecting_linux_paragraph_12.5.txt       |  2 +-
 .../connecting_linux_paragraph_12.6.txt       |  2 +-
 .../connecting_linux_paragraph_5.1.txt        |  2 +-
 .../connecting_linux_paragraph_5.2.txt        |  2 +-
 .../account/account_macos_paragraph_5.3.txt   |  2 +-
 .../account/account_macos_paragraph_5.4.txt   |  7 +---
 .../account/account_macos_paragraph_7.1.txt   |  2 +-
 .../account/account_macos_paragraph_7.2.txt   |  2 +-
 .../connecting_macos_paragraph_10.1.txt       | 10 +++---
 .../connecting_macos_paragraph_12.4.txt       |  2 +-
 .../connecting_macos_paragraph_12.5.txt       |  2 +-
 .../connecting_macos_paragraph_12.6.txt       |  2 +-
 .../connecting_macos_paragraph_5.1.txt        |  2 +-
 .../connecting_macos_paragraph_5.2.txt        |  2 +-
 .../connecting_windows_paragraph_4.1.txt      |  3 +-
 47 files changed, 376 insertions(+), 51 deletions(-)
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 6ec1aa0e9b2..2b23fb4e962 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -195,10 +195,6 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
             elif re.fullmatch(r'!--.*?--', content):
                 curr_line = re.sub(r'<.*?>', "", curr_line)
 
-            # special case (ugly fix)
-            elif ' files</b' in content:
-                curr_line = re.sub(r'</b>', "", curr_line)
-
             # keep the rest
             else:
                 pass
@@ -224,7 +220,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
             for i, content in enumerate(asterisks):
                 curr_line = re.sub(r"(\*+)" + content[1] + r"\1", content[1], curr_line)
 
-        pluses = re.findall(r'\+\+(.+?)\+\+', curr_line)
+        pluses = list(set(re.findall(r'\+\+([^ ]+?)\+\+', curr_line) + re.findall(r'\+\+(".+?")\+\+', curr_line)))
         if pluses:
             for i, content in enumerate(pluses):
                 curr_line = re.sub(r"\+\+" + content + r"\+\+", content, curr_line)
@@ -437,6 +433,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
             # detect whether the current line is in a list
             if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line):  # beginning of a list entry
                 in_list = True
+                # print("List entry found")
             elif re.search(r'^\s{2,}.+$', line) and in_list:  # middle of a list entry
                 pass
             elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list:  # line(s) between list entries
@@ -444,9 +441,6 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
             else:
                 in_list = False
 
-            if in_list:
-                print(line[:-1])
-
             # only split up if current line is in a fully non-os-specific section
             if in_if_statement == 0:
 
@@ -969,6 +963,7 @@ def main(options, verbose=True):
 
     # for loops over all files
     for filename in filenames.keys():
+        print("Processing " + filename)
         ################### define/reset loop specific variables ###################
 
         # boolean indicating whether the current file is part of the linux tutorial
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
index 371dd9db52b..f486b9b1348 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
@@ -10,7 +10,6 @@ https://account.vscentrum.be/
 For further info please visit https://www.vscentrum.be/user-portal
 Kind regards,
 -- The VSC administrators
-
 Now, you can start using the HPC. You can always look up your VSC id later
 by visiting <https://account.vscentrum.be>.
 Adding multiple SSH public keys (optional)
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt
new file mode 100644
index 00000000000..db1afd43e68
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt
@@ -0,0 +1,10 @@
+Compiling and testing your software on the HPC
+All nodes in the HPC cluster are running the "RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty)" 
+Operating system, which is a specific version of Red Hat Enterprise Linux. This means that all the 
+software programs
+(executable) that the end-user wants to run on the HPC first must be
+compiled for RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). It also means that you first have to install all the
+required external software packages on the HPC.
+Most commonly used compilers are already pre-installed on the HPC and can be
+used straight away. Also, many popular external software packages, which
+are regularly used in the scientific community, are also pre-installed.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt
new file mode 100644
index 00000000000..d49ba76b01a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt
@@ -0,0 +1,19 @@
+The "mpi_hello.c" program is a simple source file, written in C with MPI
+library calls.
+Then, check the command line options for *"mpicc" (GNU C-Compiler with
+MPI extensions)*, then we compile and list the contents of the directory
+again:
+mpicc --help
+mpicc -o mpihello mpihello.c
+ls -l
+A new file "hello" has been created. Note that this program has
+"execute" rights.
+Let's test this program on the "login" node first:
+$ ./mpihello
+Hello World from Node 0.
+It seems to work, now run it on the HPC.
+qsub mpihello.pbs
+Compiling a parallel program in Intel Parallel Studio Cluster Edition
+We will now compile the same program, but using the Intel Parallel
+Studio Cluster Edition compilers. We stay in the examples directory for
+this chapter:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json
new file mode 100644
index 00000000000..ca0d7d80669
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition",
+    "title_depth": 3,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_9",
+    "next_title": "compiling_your_software_paragraph_11",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt
new file mode 100644
index 00000000000..be02d069ac7
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt
@@ -0,0 +1,20 @@
+cd ~/examples/Compiling-and-testing-your-software-on-the-HPC
+We will compile this C/MPI -file into an executable with the Intel
+Parallel Studio Cluster Edition. First, clear the modules (purge) and
+then load the latest "intel" module:
+module purge
+module load intel
+Then, compile and list the contents of the directory again. The Intel
+equivalent of mpicc is mpiicc.
+mpiicc -o mpihello mpihello.c
+ls -l
+Note that the old "mpihello" file has been overwritten. Let's test this
+program on the "login" node first:
+$ ./mpihello
+Hello World from Node 0.
+It seems to work, now run it on the HPC.
+qsub mpihello.pbs
+Note: The AUGent only has a license for the Intel Parallel Studio Cluster
+Edition for a fixed number of users. As such, it might happen that you
+have to wait a few minutes before a floating license becomes available
+for your use.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json
new file mode 100644
index 00000000000..808331a3f9d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition",
+    "title_depth": 3,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_10",
+    "next_title": "compiling_your_software_paragraph_12",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt
new file mode 100644
index 00000000000..1d37014a426
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt
@@ -0,0 +1,9 @@
+Note: The Intel Parallel Studio Cluster Edition contains equivalent
+compilers for all GNU compilers. Hereafter the overview for C, C++ and
+Fortran compilers.
+|             | Sequential Program |           | **Parallel Program (with MPI)** |           |
+|-------------|------------------------|-----------|---------------------------------|-----------|
+|             | GNU                | Intel | GNU                         | Intel |
+| C       | gcc                    | icc       | mpicc                           | mpiicc    |
+| **C++**     | g++                    | icpc      | mpicxx                          | mpiicpc   |
+| Fortran | gfortran               | ifort     | mpif90                          | mpiifort  |
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json
new file mode 100644
index 00000000000..d032428daf1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition",
+    "title_depth": 3,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_11",
+    "next_title": null,
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json
new file mode 100644
index 00000000000..ec4b55c9a4d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-and-testing-your-software-on-the-HPC",
+    "title_depth": 1,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": null,
+    "next_title": "compiling_your_software_paragraph_2",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-testing-your-software-on-the-hpc"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt
new file mode 100644
index 00000000000..b52639b649d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt
@@ -0,0 +1,13 @@
+Check the pre-installed software on the HPC
+In order to check all the available modules and their version numbers,
+which are pre-installed on the HPC enter:
+When your required application is not available on the HPC please contact
+any HPC member. Be aware of potential "License Costs". "Open Source"
+software is often preferred.
+Porting your code
+To port a software-program is to translate it from the operating system in
+which it was developed (e.g., Windows 7) to another operating system
+(e.g., Red Hat Enterprise Linux on our HPC) so that it can be used there. Porting implies some
+degree of effort, but not nearly as much as redeveloping the program in
+the new environment. It all depends on how "portable" you wrote your
+code.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json
new file mode 100644
index 00000000000..00750c81d97
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Porting-your-code",
+    "title_depth": 2,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_1",
+    "next_title": "compiling_your_software_paragraph_3",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt
new file mode 100644
index 00000000000..f994f0bc148
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt
@@ -0,0 +1,13 @@
+In the simplest case the file or files may simply be copied from one
+machine to the other. However, in many cases the software is installed
+on a computer in a way, which depends upon its detailed hardware,
+software, and setup, with device drivers for particular devices, using
+installed operating system and supporting software components, and using
+different directories.
+In some cases software, usually described as "portable software" is
+specifically designed to run on different computers with compatible
+operating systems and processors without any machine-dependent
+installation; it is sufficient to transfer specified directories and
+their contents. Hardware- and software-specific information is often
+stored in configuration files in specified locations (e.g., the registry
+on machines running MS Windows).
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json
new file mode 100644
index 00000000000..90e7d236beb
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Porting-your-code",
+    "title_depth": 2,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_2",
+    "next_title": "compiling_your_software_paragraph_4",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt
new file mode 100644
index 00000000000..f7bf4172b71
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt
@@ -0,0 +1,15 @@
+Software, which is not portable in this sense, will have to be
+transferred with modifications to support the environment on the
+destination machine.
+Whilst programming, it would be wise to stick to certain standards
+(e.g., ISO/ANSI/POSIX). This will ease the porting of your code to other
+platforms.
+Porting your code to the RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty) platform is the responsibility of the end-user.
+Compiling and building on the HPC
+Compiling refers to the process of translating code written in some
+programming language, e.g., Fortran, C, or C++, to machine code.
+Building is similar, but includes gluing together the machine code
+resulting from different source files into an executable (or library).
+The text below guides you through some basic problems typical for small
+software projects. For larger projects it is more appropriate to use
+makefiles or even an advanced build system like CMake.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json
new file mode 100644
index 00000000000..b7c9ef0f71b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-and-building-on-the-HPC",
+    "title_depth": 2,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_3",
+    "next_title": "compiling_your_software_paragraph_5",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt
new file mode 100644
index 00000000000..342262b9264
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt
@@ -0,0 +1,16 @@
+All the HPC nodes run the same version of the Operating System, i.e. RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). So,
+it is sufficient to compile your program on any compute node. Once you
+have generated an executable with your compiler, this executable should
+be able to run on any other compute-node.
+A typical process looks like:
+1.  Copy your software to the login-node of the HPC
+2.  Start an interactive session on a compute node;
+3.  Compile it;
+4.  Test it locally;
+5.  Generate your job scripts;
+6.  Test it on the HPC
+7.  Run it (in parallel);
+We assume you've copied your software to the HPC. The next step is to request
+your private compute node.
+$ qsub -I
+qsub: waiting for job 123456 to start
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json
new file mode 100644
index 00000000000..02a8fad0ae2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-and-building-on-the-HPC",
+    "title_depth": 2,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_4",
+    "next_title": "compiling_your_software_paragraph_6",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt
new file mode 100644
index 00000000000..7ebde664878
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt
@@ -0,0 +1,30 @@
+Compiling a sequential program in C
+Go to the examples for chapter 
+Compiling and testing your software on the HPC and load the 
+foss module:
+cd ~/examples/Compiling-and-testing-your-software-on-the-HPC
+module load foss
+We now list the directory and explore the contents of the "hello.c"
+program:
+$ ls -l
+total 512
+-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c
+-rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs*
+-rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c
+-rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs
+/*
+ * VSC        : Flemish Supercomputing Centre
+ * Tutorial   : Introduction to HPC
+ * Description: Print 500 numbers, whilst waiting 1 second in between
+ */
+#include "stdio.h"
+int main( int argc, char *argv[] )
+{
+  int i;
+  for (i=0; i<500; i++)
+  {
+    printf("Hello #%d\n", i);
+    fflush(stdout);
+    sleep(1);
+  }
+}
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json
new file mode 100644
index 00000000000..16942249583
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json
@@ -0,0 +1,14 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-a-sequential-program-in-C",
+    "title_depth": 3,
+    "directory": "compiling_your_software",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc"
+    },
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_5",
+    "next_title": "compiling_your_software_paragraph_7",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt
new file mode 100644
index 00000000000..1d58d0d6ae4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt
@@ -0,0 +1,15 @@
+The "hello.c" program is a simple source file, written in C. It'll print
+500 times "Hello #&lt;num&gt;", and waits one second between 2 printouts.
+We first need to compile this C-file into an executable with the
+gcc-compiler.
+First, check the command line options for *"gcc" (GNU C-Compiler)*, then
+we compile. the O2 option enables a moderate level of optimization when compiling the code. 
+It instructs the compiler to optimize the code for better performance without significantly increasing compilation time.
+Finally, list the contents of the directory again:
+$ gcc -help
+$ gcc -O2 -o hello hello.c
+$ ls -l
+total 512
+-rwxrwxr-x 1 vsc40000 7116 Sep 16 11:43 hello*
+-rw-r--r-- 1 vsc40000  214 Sep 16 09:42 hello.c
+-rwxr-xr-x 1 vsc40000  130 Sep 16 11:39 hello.pbs*
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json
new file mode 100644
index 00000000000..e5f3161c3f2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-a-sequential-program-in-C",
+    "title_depth": 3,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_6",
+    "next_title": "compiling_your_software_paragraph_8",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt
new file mode 100644
index 00000000000..5ca5de1e6d4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt
@@ -0,0 +1,19 @@
+A new file "hello" has been created. Note that this file has "execute"
+rights, i.e., it is an executable. More often than not, calling gcc --
+or any other compiler for that matter -- will provide you with a list of
+errors and warnings referring to mistakes the programmer made, such as
+typos, syntax errors. You will have to correct them first in order to
+make the code compile. Warnings pinpoint less crucial issues that may
+relate to performance problems, using unsafe or obsolete language
+features, etc. It is good practice to remove all warnings from a
+compilation process, even if they seem unimportant so that a code change
+that produces a warning does not go unnoticed.
+Let's test this program on the local compute node, which is at your
+disposal after the qsub --I command:
+$ ./hello
+Hello #0
+Hello #1
+Hello #2
+Hello #3
+Hello #4
+...
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json
new file mode 100644
index 00000000000..942949951d1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-a-sequential-program-in-C",
+    "title_depth": 3,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_7",
+    "next_title": "compiling_your_software_paragraph_9",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt
new file mode 100644
index 00000000000..28982d2bd95
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt
@@ -0,0 +1,32 @@
+It seems to work, now run it on the HPC
+qsub hello.pbs
+Compiling a parallel program in C/MPI
+cd ~/examples/Compiling-and-testing-your-software-on-the-HPC
+List the directory and explore the contents of the "mpihello.c"
+program:
+$ ls -l
+total 512
+total 512
+-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c
+-rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs*
+-rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c
+-rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs
+/*
+ * VSC        : Flemish Supercomputing Centre
+ * Tutorial   : Introduction to HPC
+ * Description: Example program, to compile with MPI
+ */
+#include <stdio.h>
+#include <mpi.h>
+main(int argc, char **argv)
+{
+  int node, i, j;
+  float f;
+  MPI_Init(&argc,&argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &node);
+   
+  printf("Hello World from Node %d.\n", node);
+  for (i=0; i<=100000; i++)
+    f=i*2.718281828*i+i+i*3.141592654;
+  MPI_Finalize();
+}
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json
new file mode 100644
index 00000000000..fe51e423a96
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json
@@ -0,0 +1,11 @@
+{
+    "main_title": "compiling_your_software",
+    "subtitle": "Compiling-a-parallel-program-in-CMPI",
+    "title_depth": 3,
+    "directory": "compiling_your_software",
+    "parent_title": "",
+    "previous_title": "compiling_your_software_paragraph_8",
+    "next_title": "compiling_your_software_paragraph_10",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-cmpi"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
index 9d84f459724..5df90a3dd7c 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
@@ -2,7 +2,7 @@ Generate a public/private key pair with OpenSSH
 A key pair might already be present in the default location inside your
 home directory. Therefore, we first check if a key is available with the
 "list short" ("ls") command:
-$ ls ~/.ssh
+ls ~/.ssh
 If a key-pair is already available, you would normally get:
 authorized_keys     id_rsa      id_rsa.pub      known_hosts
 Otherwise, the command will show:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt
index 3cde4395d81..d29d61d27d9 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt
@@ -10,9 +10,4 @@ the passphrase. Be sure to never give away your private key, it is
 private and should stay private. You should not even copy it to one of
 your other machines, instead, you should create a new public/private key
 pair for each machine.
-$ ssh-keygen -t rsa -b 4096
-Generating public/private rsa key pair. Enter file in which to save the
-key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no
-passphrase): Enter same passphrase again: Your identification has been
-saved in /home/user/.ssh/id_rsa. Your public key has been saved in
-/home/user/.ssh/id_rsa.pub.
+ssh-keygen -t rsa -b 4096
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt
index e3ef2176f09..8e8429c1642 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt
@@ -7,7 +7,7 @@ Agent admitted failure to sign using the key.
 Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
 This could be fixed using the ssh-add command. You can include the new
 private keys' identities in your keyring with:
-$ ssh-add
+ssh-add
  tip
     Without extra options ssh-add adds any key located at $HOME/.ssh
     directory, but you can specify the private key location path as
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt
index 93019fa1a6a..c227dbbb6e2 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt
@@ -1,5 +1,5 @@
 Check that your key is available from the keyring with:
-$ ssh-add -l
+ssh-add -l
 After these changes the key agent will keep your SSH key to connect to
 the clusters as usual.
  tip
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt
index 3e588c709d4..4c8894438c9 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt
@@ -14,7 +14,6 @@ First Time connection to the HPC infrastructure
     export LANG="en_US.UTF-8"
     ...
     
-    
      tip "tip: vi"
         To start entering text in vi: move to the place you want to start
         entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
@@ -24,11 +23,10 @@ First Time connection to the HPC infrastructure
     or alternatively (if you are not comfortable with the Linux editors),
     again on your local machine:
     
-    $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
-    $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
-    $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
-    $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
-    
+    echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
     
     You can now log out, open a new terminal/shell on your local machine and
     reconnect to the login node, and you should not get these warnings anymore.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt
index 2664953ed0c..d09b69552ef 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt
@@ -11,4 +11,4 @@ It's also possible to copy entire directories (and their contents) with
 the -r flag. For example, if we want to copy the local directory
 dataset to $VSC_SCRATCH, we can use the following command (assuming
 you've created the scratch symlink):
-$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch
+scp -r dataset vsc40000@login.hpc.ugent.be:scratch
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt
index 51d39b548c3..532d57bb4a5 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt
@@ -1,6 +1,6 @@
 If you don't use the -r option to copy a directory, you will run into
 the following error:
-$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch
+$ scp dataset vsc40000@login.hpc.ugent.be:scratch
 dataset: not a regular file
 Using sftp
 The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt
index 4ae257101f1..1ef13b80c6f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt
@@ -1,5 +1,5 @@
 One easy way of starting a sftp session is
-$ sftp vsc40000@login.hpc.ugent.be
+sftp vsc40000@login.hpc.ugent.be
 Typical and popular commands inside an sftp session are:
 |                       |                                                                                      |
 |:--------------------------|:-------------------------------------------------------------------------------------|
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt
index 94d5d9500a3..27ae3fb7bd4 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt
@@ -1,7 +1,7 @@
 First Time connection to the HPC infrastructure
 Connect
 Open up a terminal and enter the following command to connect to the HPC.
-$ ssh vsc40000@login.hpc.ugent.be
+ssh vsc40000@login.hpc.ugent.be
 Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login
 node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command.
 The first time you make a connection to the login node, you will be
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt
index 312fe885cb0..be01e09bba0 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt
@@ -1,4 +1,4 @@
 Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
 In this case, use the -i option for the ssh command to specify the
 location of your private key. For example:
-$ ssh -i /home/example/my_keys
+ssh -i /home/example/my_keys
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
index 2c97d597425..a8c087f818b 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
@@ -6,7 +6,7 @@ Generate a public/private key pair with OpenSSH
 A key pair might already be present in the default location inside your
 home directory. Therefore, we first check if a key is available with the
 "list short" ("ls") command:
-$ ls ~/.ssh
+ls ~/.ssh
 If a key-pair is already available, you would normally get:
 authorized_keys     id_rsa      id_rsa.pub      known_hosts
 Otherwise, the command will show:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt
index 3cde4395d81..d29d61d27d9 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt
@@ -10,9 +10,4 @@ the passphrase. Be sure to never give away your private key, it is
 private and should stay private. You should not even copy it to one of
 your other machines, instead, you should create a new public/private key
 pair for each machine.
-$ ssh-keygen -t rsa -b 4096
-Generating public/private rsa key pair. Enter file in which to save the
-key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no
-passphrase): Enter same passphrase again: Your identification has been
-saved in /home/user/.ssh/id_rsa. Your public key has been saved in
-/home/user/.ssh/id_rsa.pub.
+ssh-keygen -t rsa -b 4096
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt
index d204f4e4392..1069ebd9fbd 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt
@@ -7,7 +7,7 @@ Agent admitted failure to sign using the key.
 Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
 This could be fixed using the ssh-add command. You can include the new
 private keys' identities in your keyring with:
-$ ssh-add
+ssh-add
  tip
     Without extra options ssh-add adds any key located at $HOME/.ssh
     directory, but you can specify the private key location path as
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt
index 8fd93f6b4f6..c880ee4a228 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt
@@ -1,5 +1,5 @@
 Check that your key is available from the keyring with:
-$ ssh-add -l
+ssh-add -l
 After these changes the key agent will keep your SSH key to connect to
 the clusters as usual.
  tip
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt
index 3e588c709d4..4c8894438c9 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt
@@ -14,7 +14,6 @@ First Time connection to the HPC infrastructure
     export LANG="en_US.UTF-8"
     ...
     
-    
      tip "tip: vi"
         To start entering text in vi: move to the place you want to start
         entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
@@ -24,11 +23,10 @@ First Time connection to the HPC infrastructure
     or alternatively (if you are not comfortable with the Linux editors),
     again on your local machine:
     
-    $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
-    $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
-    $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
-    $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
-    
+    echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
     
     You can now log out, open a new terminal/shell on your local machine and
     reconnect to the login node, and you should not get these warnings anymore.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt
index 2664953ed0c..d09b69552ef 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt
@@ -11,4 +11,4 @@ It's also possible to copy entire directories (and their contents) with
 the -r flag. For example, if we want to copy the local directory
 dataset to $VSC_SCRATCH, we can use the following command (assuming
 you've created the scratch symlink):
-$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch
+scp -r dataset vsc40000@login.hpc.ugent.be:scratch
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt
index 51d39b548c3..532d57bb4a5 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt
@@ -1,6 +1,6 @@
 If you don't use the -r option to copy a directory, you will run into
 the following error:
-$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch
+$ scp dataset vsc40000@login.hpc.ugent.be:scratch
 dataset: not a regular file
 Using sftp
 The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt
index 4ae257101f1..1ef13b80c6f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt
@@ -1,5 +1,5 @@
 One easy way of starting a sftp session is
-$ sftp vsc40000@login.hpc.ugent.be
+sftp vsc40000@login.hpc.ugent.be
 Typical and popular commands inside an sftp session are:
 |                       |                                                                                      |
 |:--------------------------|:-------------------------------------------------------------------------------------|
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt
index d4c89b7e1c7..1e22cfc8b1f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt
@@ -2,7 +2,7 @@ First Time connection to the HPC infrastructure
 Connect
 Open up a terminal and enter the following command to connect to the HPC.
 You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search.
-$ ssh vsc40000@login.hpc.ugent.be
+ssh vsc40000@login.hpc.ugent.be
 Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login
 node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command.
 The first time you make a connection to the login node, you will be
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt
index 6fa418464dd..f3f5ac6e775 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt
@@ -4,4 +4,4 @@ private key somewhere else than the default location
 Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
 In this case, use the -i option for the ssh command to specify the
 location of your private key. For example:
-$ ssh -i /home/example/my_keys
+ssh -i /home/example/my_keys
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
index b5ecfb93e88..69db57957dc 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
@@ -33,10 +33,9 @@ to the HPC cluster via the login node "login.hpc.ugent.be".
     login-node of the HPC.
 10. To check you can now "Print the Working Directory" (pwd) and check
     the name of the computer, where you have logged in (hostname):
-        $ pwd
+    $ pwd
     /user/home/gent/vsc400/vsc40000
     $ hostname -f
     gligar07.gastly.os
-    
 11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent")
     from the list, "Load" it and press "Open".

From 6d04bbc7656406eaa1d00e0386ecfea76848b5e7 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 28 Aug 2024 15:45:41 +0200
Subject: [PATCH 132/152] add source-directory to metadata and verbose mode

---
 .../chatbot_parser.py                         | 34 +++++++++++--------
 .../tps1/tps1_paragraph_1_metadata.json       |  1 +
 .../tps1/tps1_paragraph_3_metadata.json       |  1 +
 .../tps1_linux_paragraph_2.1_metadata.json    |  1 +
 .../tps1_linux_paragraph_2.2_metadata.json    |  1 +
 .../tps1_macos_paragraph_2.1_metadata.json    |  1 +
 .../tps1_macos_paragraph_2.2_metadata.json    |  1 +
 .../tps1_windows_paragraph_2.1_metadata.json  |  1 +
 .../tps1_windows_paragraph_2.2_metadata.json  |  1 +
 .../Subtitle-1/Subtitle-1_metadata.json       |  1 +
 .../Subtitle-5-g/Subtitle-5-g_metadata.json   |  1 +
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   |  1 +
 .../Subtitle-4-l&m_metadata.json              |  1 +
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   |  1 +
 .../Subtitle-4-l&m_metadata.json              |  1 +
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   |  1 +
 .../Subtitle-3-w/Subtitle-3-w_metadata.json   |  1 +
 .../tests/test_full_script.py                 |  8 +++--
 .../tests/test_write_metadata.py              | 12 +++----
 19 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 2b23fb4e962..e4ed0009654 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -21,6 +21,7 @@
 INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT"
 SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS"
 DEEP_DIRECTORIES = "DEEP_DIRECTORIES"
+VERBOSE = "VERBOSE"
 
 # directories
 PARSED_MDS = "parsed_mds"
@@ -67,6 +68,7 @@
 WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message"
 
 # Metadata attributes
+SOURCE_FILE = "source_file"
 MAIN_TITLE = "main_title"
 SUBTITLE = "subtitle"
 TITLE_DEPTH = "title_depth"
@@ -207,7 +209,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     if '???' in curr_line:
         curr_line = re.sub(r'\?\?\?', "", curr_line)
 
-    # get rid of other markdown indicators (`, *, +, _)
+    # get rid of other indicators (`, *, +, _)
     if not in_code_block:
 
         backquotes = re.findall(r'`(.*?)`', curr_line)
@@ -320,7 +322,7 @@ def split_on_titles(file, main_title, options):
                             paragraphs_os_free_text[title] = current_paragraph
 
                         # write metadata of previous file
-                        paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir)
+                        paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
 
                     # make a new title
                     title = make_valid_title(line[title_level + 1:-1])
@@ -357,7 +359,7 @@ def split_on_titles(file, main_title, options):
         paragraphs_os_text[title] = current_paragraph
     else:
         paragraphs_os_free_text[title] = current_paragraph
-    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level])
+    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
 
     return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
 
@@ -407,7 +409,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
     # metadata title
     metadata_title = main_title
 
-    # TODO: define metadata data if split occurs on paragraphs and last_title and title_level are known (placeholder in place right now)
+    # define metadata data if split occurs on paragraphs and last_title and title_level are known (will be replaced later on in the process)
     if current_paragraph_number != -1:
         last_title_level = 4
         last_dir = "PLACEHOLDER"
@@ -467,7 +469,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
                         paragraphs_os_free_text[paragraph_title] = current_paragraph
 
                     # write metadata of previous file
-                    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir)
+                    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
                     subtitle_order.append(paragraph_title)
 
                     # reset the current paragraph
@@ -512,13 +514,13 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
         paragraphs_os_text[paragraph_title] = current_paragraph
     else:
         paragraphs_os_free_text[paragraph_title] = current_paragraph
-    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level])
+    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
     subtitle_order.append(paragraph_title)
 
     return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
 
 
-def write_metadata(main_title, subtitle, links, title_level, directory):
+def write_metadata(main_title, subtitle, links, title_level, directory, source_file):
     """
     Function that writes metadata about a text section to a dictionary
 
@@ -527,10 +529,11 @@ def write_metadata(main_title, subtitle, links, title_level, directory):
     :param links: a list of links contained within the section
     :param title_level: the depth of the title of the section
     :param directory: the directory where the section will eventually be written (can either be generic or os-specific)
+    :param source_file: the source file that the section originates from
     :return paragraph_metadata: dictionary containing the metadata about the section
     """
 
-    paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, TITLE_DEPTH: title_level, DIRECTORY: directory}
+    paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory}
 
     if len(links) > 0:
         paragraph_metadata[LINKS] = {}
@@ -918,7 +921,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
             pass
 
 
-def main(options, verbose=True):
+def main(options):
     """
     main function
 
@@ -931,11 +934,10 @@ def main(options, verbose=True):
                     MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text,
                     INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext,
                     DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not}
-    :param verbose: boolean indicating whether print statements from the main function should be print, only used when for testing
     :return:
     """
 
-    if options[DEEP_DIRECTORIES] and verbose:
+    if options[DEEP_DIRECTORIES] and options[VERBOSE]:
         print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
 
     # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
@@ -963,7 +965,6 @@ def main(options, verbose=True):
 
     # for loops over all files
     for filename in filenames.keys():
-        print("Processing " + filename)
         ################### define/reset loop specific variables ###################
 
         # boolean indicating whether the current file is part of the linux tutorial
@@ -987,6 +988,9 @@ def main(options, verbose=True):
 
         ################### actually parse the md file ###################
 
+        if options[VERBOSE]:
+            print("Processing " + filename)
+
         # create directories for the source markdown file
         for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
             os.makedirs(directory, exist_ok=True)
@@ -1015,7 +1019,7 @@ def main(options, verbose=True):
     if os.path.exists(TEMP_JINJA_FILE):
         os.remove(TEMP_JINJA_FILE)
 
-    if verbose:
+    if options[VERBOSE]:
         print("Parsing finished successfully")
 
 
@@ -1031,6 +1035,7 @@ def main(options, verbose=True):
     parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)")
     parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
     parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled")
+    parser.add_argument("-v", "--verbose", action="store_true", help="Run the script with verbose output")
 
     args = parser.parse_args()
 
@@ -1041,6 +1046,7 @@ def main(options, verbose=True):
                     MIN_PARAGRAPH_LENGTH: args.min_paragraph_length,
                     MAX_TITLE_DEPTH: args.max_title_depth,
                     INCLUDE_LINKS_IN_PLAINTEXT: args.links,
-                    DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles}
+                    DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles,
+                    VERBOSE: args.verbose}
 
     main(options_dict)
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
index 19e44fad91d..08c0b4e4973 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tps1",
     "subtitle": "Main-title",
+    "source_file": "tests/test_files/ftps/tps1.md",
     "title_depth": 1,
     "directory": "tps1",
     "links": {
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
index b4c98ff6465..2f1ea4dcd1f 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tps1",
     "subtitle": "Conclusion",
+    "source_file": "tests/test_files/ftps/tps1.md",
     "title_depth": 2,
     "directory": "tps1",
     "parent_title": "",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
index bac81ed87e3..208cb3472f4 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tps1",
     "subtitle": "OS-specific-sections",
+    "source_file": "tests/test_files/ftps/tps1.md",
     "title_depth": 2,
     "directory": "tps1",
     "parent_title": "Main-title",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
index 522265436ab..b975dfe4e03 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tps1",
     "subtitle": "Non-Windows-section",
+    "source_file": "tests/test_files/ftps/tps1.md",
     "title_depth": 3,
     "directory": "tps1",
     "parent_title": "OS-specific-sections",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
index 5d9ec163f99..9c605eb9004 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tps1",
     "subtitle": "OS-specific-sections",
+    "source_file": "tests/test_files/ftps/tps1.md",
     "title_depth": 2,
     "directory": "tps1",
     "parent_title": "Main-title",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
index 7b06f06efdd..e3ca81d7cc5 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tps1",
     "subtitle": "Non-Windows-section",
+    "source_file": "tests/test_files/ftps/tps1.md",
     "title_depth": 3,
     "directory": "tps1",
     "parent_title": "OS-specific-sections",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
index e8e50aa6c32..ab58c622b8c 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tps1",
     "subtitle": "OS-specific-sections",
+    "source_file": "tests/test_files/ftps/tps1.md",
     "title_depth": 2,
     "directory": "tps1",
     "parent_title": "Main-title",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
index 84ea6ad53f9..435c9e9c484 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tps1",
     "subtitle": "Windows-specific-section",
+    "source_file": "tests/test_files/ftps/tps1.md",
     "title_depth": 3,
     "directory": "tps1",
     "parent_title": "OS-specific-sections",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
index 9fdbce652bf..b7786c066a7 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tts1",
     "subtitle": "Subtitle-1",
+    "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
     "directory": "tts1\\Main-title\\Subtitle-1",
     "parent_title": "Main-title",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
index b48bcaaa08c..eb5403804e2 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tts1",
     "subtitle": "Subtitle-5-g",
+    "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
     "directory": "tts1\\Main-title\\Subtitle-5-g",
     "parent_title": "Main-title",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
index a2b68c8865e..f7330bec86d 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tts1",
     "subtitle": "Subtitle-2-g",
+    "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
     "directory": "tts1\\Main-title\\Subtitle-2-g",
     "parent_title": "Main-title",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
index 537541e2cb0..a76f852c874 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tts1",
     "subtitle": "Subtitle-4-l&m",
+    "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 3,
     "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
     "parent_title": "Subtitle-2-g",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
index 6846da26b72..8b234c92fa6 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tts1",
     "subtitle": "Subtitle-2-g",
+    "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
     "directory": "tts1\\Main-title\\Subtitle-2-g",
     "parent_title": "Main-title",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
index 4e167b116d2..732d309da81 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tts1",
     "subtitle": "Subtitle-4-l&m",
+    "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 3,
     "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
     "parent_title": "Subtitle-2-g",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
index c4620a94080..7a43426a85f 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tts1",
     "subtitle": "Subtitle-2-g",
+    "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
     "directory": "tts1\\Main-title\\Subtitle-2-g",
     "parent_title": "Main-title",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
index aa4b6317ce6..4d7f494320d 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "tts1",
     "subtitle": "Subtitle-3-w",
+    "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 3,
     "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w",
     "parent_title": "Subtitle-2-g",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
index 61a6f3f1bdf..91605dec651 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
@@ -14,7 +14,8 @@
       "MIN_PARAGRAPH_LENGTH": 160,
       "MAX_TITLE_DEPTH": 4,
       "INCLUDE_LINKS_IN_PLAINTEXT": False,
-      "DEEP_DIRECTORIES": False}
+      "DEEP_DIRECTORIES": False,
+      "VERBOSE": False}
      ),
     ("tests/test_files/ftts", "tests/test_files/ftts/actual",
      "tests/test_files/ftts/output",
@@ -25,12 +26,13 @@
       "MIN_PARAGRAPH_LENGTH": 160,
       "MAX_TITLE_DEPTH": 4,
       "INCLUDE_LINKS_IN_PLAINTEXT": False,
-      "DEEP_DIRECTORIES": True}
+      "DEEP_DIRECTORIES": True,
+      "VERBOSE": False}
      )
 ])
 def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options):
     # run the script
-    main(options, verbose=False)
+    main(options)
 
     # Compare directories and files
     for dirpath, dirnames, filenames in os.walk(expected_output_directory):
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
index 68f1772cb24..6c30fef7985 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
@@ -3,13 +3,13 @@
 from chatbot_parser import write_metadata
 
 
-@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,output", [
-    ("", "", [], 1, "", {"main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}),
+@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [
+    ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}),
     ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2,
-     os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"),
-     {"main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2,
+     os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source",
+     {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2,
       "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"),
       "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}})
 ])
-def test_write_metadata(main_title, subtitle, links, title_level, directory, output):
-    assert write_metadata(main_title, subtitle, links, title_level, directory) == output
+def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output):
+    assert write_metadata(main_title, subtitle, links, title_level, directory, source_file) == output

From f33cfb3b22feacf540944dc8812d5a55c59763d4 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Wed, 28 Aug 2024 16:57:24 +0200
Subject: [PATCH 133/152] added verbose mode

---
 .../chatbot_parser.py                         | 124 +++++++++++++++---
 1 file changed, 107 insertions(+), 17 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index e4ed0009654..60776fcb379 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -106,6 +106,9 @@
 # Marker for comments for the bot
 INPUT_FOR_BOT = "INPUT_FOR_BOT"
 
+# Standard strings for verbose output
+LINE = "------------------------------------------------------------------------------------------------------\n"
+
 
 ################### define functions ###################
 
@@ -266,6 +269,10 @@ def split_on_titles(file, main_title, options):
     :return paragraphs_metadata: dictionary containing the metadata of each split section of text
     :return subtitle_order: list containing all encountered subtitles in order of appearance
     """
+
+    if options[VERBOSE]:
+        print("Splitting on titles\n")
+
     # start of assuming we haven't encountered a title
     after_first_title = False
 
@@ -302,15 +309,20 @@ def split_on_titles(file, main_title, options):
             # detect if-statements starting or ending on the current line
             in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(re.findall(IF_MANGLED_PATTERNS[ENDIF], line))
 
+            # detect codeblocks to make sure titles aren't detected in them
+            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+                in_code_block = not in_code_block
+                if options[VERBOSE]:
+                    if in_code_block:
+                        print("Detected start of a codeblock, not registering titles")
+                    else:
+                        print("Detected end of codeblock, registering titles again")
+
             # only split up if current line is in a fully non-os-specific section
             if in_if_statement == 0:
 
                 title_level = check_for_title(line, in_code_block, curr_dirs, options)
 
-                # detect codeblocks to make sure titles aren't detected in them
-                if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
-                    in_code_block = not in_code_block
-
                 # line is a title with a maximum depth of 4
                 if title_level > 0:
                     if after_first_title:
@@ -318,8 +330,12 @@ def split_on_titles(file, main_title, options):
                         # write text of previous file
                         if previous_contained_if:
                             paragraphs_os_text[title] = current_paragraph
+                            if options[VERBOSE]:
+                                print("Saved os-specific chunk with temporary title: " + title + "\n")
                         else:
                             paragraphs_os_free_text[title] = current_paragraph
+                            if options[VERBOSE]:
+                                print("Saved generic chunk with title: " + title + "\n")
 
                         # write metadata of previous file
                         paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
@@ -357,8 +373,12 @@ def split_on_titles(file, main_title, options):
     # write dictionaries for the last file
     if previous_contained_if:
         paragraphs_os_text[title] = current_paragraph
+        if options[VERBOSE]:
+            print("Saved os-specific chunk with temporary title: " + title + "\n")
     else:
         paragraphs_os_free_text[title] = current_paragraph
+        if options[VERBOSE]:
+            print("Saved generic chunk with title: " + title + "\n")
     paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
 
     return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
@@ -377,6 +397,10 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
     :return paragraphs_metadata: dictionary containing the metadata of each split section of text
     :return subtitle_order: list containing all encountered subtitles in order of appearance
     """
+
+    if options[VERBOSE]:
+        print("Splitting on paragraphs\n")
+
     # start of assuming we are not in a code_block
     in_code_block = False
 
@@ -435,23 +459,33 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
             # detect whether the current line is in a list
             if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line):  # beginning of a list entry
                 in_list = True
-                # print("List entry found")
+                if options[VERBOSE]:
+                    print("First line of new list entry found, not starting new paragraphs: " + line[:-1])
             elif re.search(r'^\s{2,}.+$', line) and in_list:  # middle of a list entry
                 pass
             elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list:  # line(s) between list entries
                 pass
+            elif in_list:
+                if options[VERBOSE]:
+                    print("List ended, starting new paragraphs again")
+                in_list = False
             else:
                 in_list = False
 
+            # detect codeblocks to make sure titles aren't detected in them
+            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+                in_code_block = not in_code_block
+                if options[VERBOSE]:
+                    if in_code_block:
+                        print("Detected start of a codeblock, not starting new paragraphs")
+                    else:
+                        print("Detected end of codeblock, starting new paragraphs again")
+
             # only split up if current line is in a fully non-os-specific section
             if in_if_statement == 0:
 
                 title_level = check_for_title(line, in_code_block, curr_dirs, options)
 
-                # detect codeblocks to make sure titles aren't detected in them
-                if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
-                    in_code_block = not in_code_block
-
                 # check whether a new paragraph should be started
                 if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block and not in_list:
 
@@ -465,8 +499,12 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
                     # write text of previous file
                     if previous_contained_if:
                         paragraphs_os_text[paragraph_title] = current_paragraph
+                        if options[VERBOSE]:
+                            print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n")
                     else:
                         paragraphs_os_free_text[paragraph_title] = current_paragraph
+                        if options[VERBOSE]:
+                            print("Saved generic chunk with title: " + paragraph_title + "\n")
 
                     # write metadata of previous file
                     paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
@@ -512,8 +550,12 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
     # write dictionaries for the last file
     if previous_contained_if:
         paragraphs_os_text[paragraph_title] = current_paragraph
+        if options[VERBOSE]:
+            print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n")
     else:
         paragraphs_os_free_text[paragraph_title] = current_paragraph
+        if options[VERBOSE]:
+            print("Saved generic chunk with title: " + paragraph_title + "\n")
     paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
     subtitle_order.append(paragraph_title)
 
@@ -557,6 +599,9 @@ def jinja_parser(filename, copy_location, options):
     # YAML file location
     yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml')
 
+    if options[VERBOSE]:
+        print("Reading YAML file from location: " + yml_file_path)
+
     # Read the YAML file
     with open(yml_file_path, 'r') as yml_file:
         words_dict = yaml.safe_load(yml_file)
@@ -569,6 +614,9 @@ def jinja_parser(filename, copy_location, options):
     }
     combined_context = {**words_dict, **additional_context}
 
+    if options[VERBOSE]:
+        print("Mangling OS-specific if-statements")
+
     # Mangle the OS-related if-statements
     mangle_ifs(copy_location, filename, options)
 
@@ -578,6 +626,9 @@ def jinja_parser(filename, copy_location, options):
     template = templateEnv.get_template(filename)
     rendered_content = template.render(combined_context)
 
+    if options[VERBOSE]:
+        print("jinja parsing finished\nWriting to location: " + copy_location)
+
     # Save the rendered content to a new file
     with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file:
         output_file.write(rendered_content)
@@ -601,7 +652,7 @@ def load_macros(name):
             return readfile.read()
 
 
-def mangle_os_ifs(line, is_os):
+def mangle_os_ifs(line, is_os, options):
     """
     function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure.
     We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements.
@@ -612,6 +663,7 @@ def mangle_os_ifs(line, is_os):
         NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if
         OS_IF: in an os-if
         OS_IF_IN_OS_IF: in an os-if nested in an os-if
+    :param options: dictionary containing the options given by the user
     :return line: the modified line with  mangled os-related if-statements
     """
 
@@ -640,6 +692,8 @@ def mangle_os_ifs(line, is_os):
         # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these
         if endif_match:
             if is_os in (OS_IF, OS_IF_IN_OS_IF):
+                if options[VERBOSE]:
+                    print("OS-specific endif statement found in line: " + line[:-1])
                 line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
                 added_length += 2 * len(IF_MANGLED_PART)
                 if is_os == OS_IF:
@@ -651,6 +705,8 @@ def mangle_os_ifs(line, is_os):
 
         elif if_match:
             if if_os_match:
+                if options[VERBOSE]:
+                    print("OS-specific if statement found in line:    " + line[:-1])
                 line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
                 added_length += 2 * len(IF_MANGLED_PART)
                 if is_os == OS_IF:
@@ -665,6 +721,8 @@ def mangle_os_ifs(line, is_os):
 
         elif else_match:
             if is_os in (OS_IF, OS_IF_IN_OS_IF):
+                if options[VERBOSE]:
+                    print("OS-specific else statement found in line:  " + line[:-1])
                 line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
                 added_length += 2 * len(IF_MANGLED_PART)
 
@@ -688,7 +746,7 @@ def mangle_ifs(directory, filename, options):
     with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'w') as write_file:
         with open(directory, 'r') as read_file:
             for line in read_file:
-                new_line, is_os = mangle_os_ifs(line, is_os)
+                new_line, is_os = mangle_os_ifs(line, is_os, options)
                 write_file.write(new_line)
 
 
@@ -733,6 +791,9 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order,
         filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
         os.makedirs(filepath, exist_ok=True)
 
+        if options[VERBOSE]:
+            print("Writing generic section " + title + " to filepath: " + str(filepath))
+
         write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial)
     else:
         # don't write empty files
@@ -911,6 +972,9 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
                     filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
                     os.makedirs(filepath, exist_ok=True)
 
+                    if options[VERBOSE]:
+                        print("Writing os-specific section " + os_subtitle + " to filepath: " + str(filepath))
+
                     # write to files
                     write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial)
                 else:
@@ -933,10 +997,14 @@ def main(options):
                     MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph,
                     MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text,
                     INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext,
-                    DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not}
+                    DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not,
+                    VERBOSE: enable or disable verbose mode}
     :return:
     """
 
+    if options[VERBOSE]:
+        print("Running chatbot parser with options: " + str(options))
+
     if options[DEEP_DIRECTORIES] and options[VERBOSE]:
         print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
 
@@ -976,9 +1044,10 @@ def main(options):
 
         # variable that keeps track of the directories that are used to write in at different levels
         root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR)
-        root_dir_os_specific_linux = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, LINUX)
-        root_dir_os_specific_windows = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS)
-        root_dir_os_specific_macos = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, MACOS)
+        root_dir_os_specific = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR)
+        root_dir_os_specific_linux = os.path.join(root_dir_os_specific, LINUX)
+        root_dir_os_specific_windows = os.path.join(root_dir_os_specific, WINDOWS)
+        root_dir_os_specific_macos = os.path.join(root_dir_os_specific, MACOS)
 
         # variable for the main title (needed for reference links)
         main_title = filename[:-3]
@@ -989,18 +1058,31 @@ def main(options):
         ################### actually parse the md file ###################
 
         if options[VERBOSE]:
-            print("Processing " + filename)
+            print(LINE + "Processing " + filename)
+            print("Location: " + filenames[filename])
+            print("\nMaking directories:")
 
         # create directories for the source markdown file
-        for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
+        for directory in [root_dir_generic, root_dir_os_specific, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
+            if options[VERBOSE]:
+                print(directory)
             os.makedirs(directory, exist_ok=True)
 
+        if options[VERBOSE]:
+            print("\nParsing the sourcefile with jinja")
+
         # process the jinja macros
         jinja_parser(filename, copy_file, options)
 
+        if options[VERBOSE]:
+            print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)")
+
         # split the text in paragraphs
         paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options)
 
+        if options[VERBOSE]:
+            print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata")
+
         # for every section, either make the whole section generic, or create an os-specific file for each OS
         for i, subtitle in enumerate(subtitle_order):
 
@@ -1012,6 +1094,14 @@ def main(options):
             else:
                 split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial)
 
+        if options[VERBOSE]:
+            print("\nFinished processing " + filename)
+
+    if options[VERBOSE]:
+        print(LINE + "Cleaning up directories:")
+        print(os.path.join(options[DESTINATION_DIRECTORY], COPIES))
+        print(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES))
+        print(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL))
     # clean up temporary directories and files
     shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True)
     shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True)

From 3227f1939ef3933d0a8fcc22835239021abae0aa Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 29 Aug 2024 09:17:04 +0200
Subject: [PATCH 134/152] Added limitation on lists

---
 scripts/HPC_chatbot_preprocessor/README.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index b3bce665973..1795ee71554 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -164,6 +164,18 @@ Comments can be written in such a way that the script will keep them as input fo
 <!--INPUT_FOR_BOTyour comment for the bot-->
 ```
 
+This will be reworked to
+ 
+```
+your comment for the bot
+```
+
+in the final output.
+
 ### Long filepaths
 
 Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`.
+
+### Markdown lists
+
+The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle  list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays.

From 67aed53662656f95c7a9b718cf372d1ca5349283 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 29 Aug 2024 10:46:56 +0200
Subject: [PATCH 135/152] fix for non os-specific if-statement not being
 recognised

---
 scripts/HPC_chatbot_preprocessor/README.md    | 11 +++++
 .../chatbot_parser.py                         | 34 +++++++++++++-
 .../generic/account/account_paragraph_10.txt  |  8 ++--
 .../account_paragraph_10_metadata.json        |  5 +-
 .../account_paragraph_12_metadata.json        |  1 +
 .../account/account_paragraph_1_metadata.json |  1 +
 .../account/account_paragraph_2_metadata.json |  1 +
 .../account/account_paragraph_3_metadata.json |  1 +
 .../generic/account/account_paragraph_8.txt   | 11 +++--
 .../account/account_paragraph_8_metadata.json |  1 +
 .../connecting/connecting_paragraph_10.txt    | 24 ++++++++++
 .../connecting_paragraph_10_metadata.json}    |  7 +--
 .../connecting/connecting_paragraph_14.txt    |  7 ---
 .../connecting_paragraph_14_metadata.json     | 14 ------
 .../connecting/connecting_paragraph_15.txt    | 19 +++-----
 .../connecting_paragraph_15_metadata.json     |  6 +--
 .../connecting/connecting_paragraph_16.txt    | 11 +++++
 .../connecting_paragraph_16_metadata.json     | 16 +++++++
 .../connecting_paragraph_1_metadata.json      |  1 +
 .../connecting/connecting_paragraph_2.txt     |  2 +-
 .../connecting_paragraph_2_metadata.json      |  1 +
 .../connecting/connecting_paragraph_3.txt     |  1 -
 .../connecting_paragraph_3_metadata.json      |  1 +
 .../connecting/connecting_paragraph_6.txt     |  2 -
 .../connecting_paragraph_6_metadata.json      |  1 +
 .../connecting/connecting_paragraph_7.txt     |  1 -
 .../connecting_paragraph_7_metadata.json      |  1 +
 .../connecting/connecting_paragraph_8.txt     |  3 +-
 .../connecting_paragraph_8_metadata.json      |  1 +
 .../connecting/connecting_paragraph_9.txt     | 46 ++++++++-----------
 .../connecting_paragraph_9_metadata.json      |  1 +
 .../account/account_linux_paragraph_11.1.txt  |  3 ++
 ...account_linux_paragraph_11.1_metadata.json |  1 +
 .../account_linux_paragraph_4.1_metadata.json |  1 +
 .../account_linux_paragraph_5.1_metadata.json |  1 +
 .../account_linux_paragraph_5.2_metadata.json |  1 +
 .../account_linux_paragraph_5.3_metadata.json |  1 +
 .../account_linux_paragraph_5.4_metadata.json |  1 +
 .../account_linux_paragraph_5.5_metadata.json |  1 +
 .../account_linux_paragraph_6.1_metadata.json |  1 +
 .../account_linux_paragraph_7.1_metadata.json |  1 +
 .../account_linux_paragraph_7.2_metadata.json |  1 +
 .../account/account_linux_paragraph_9.1.txt   |  9 ++--
 .../account_linux_paragraph_9.1_metadata.json |  1 +
 .../connecting_linux_paragraph_10.1.txt       | 33 -------------
 ...necting_linux_paragraph_10.1_metadata.json | 11 -----
 .../connecting_linux_paragraph_11.1.txt       | 43 ++++++++++++++---
 ...necting_linux_paragraph_11.1_metadata.json |  5 +-
 .../connecting_linux_paragraph_12.1.txt       | 18 +++-----
 ...necting_linux_paragraph_12.1_metadata.json | 14 +++---
 .../connecting_linux_paragraph_13.1.txt       | 20 ++++----
 ...necting_linux_paragraph_13.1_metadata.json | 10 ++--
 ...xt => connecting_linux_paragraph_13.2.txt} |  0
 ...ecting_linux_paragraph_13.2_metadata.json} |  5 +-
 ...xt => connecting_linux_paragraph_13.3.txt} |  0
 ...ecting_linux_paragraph_13.3_metadata.json} |  5 +-
 ...xt => connecting_linux_paragraph_13.4.txt} |  0
 ...ecting_linux_paragraph_13.4_metadata.json} |  5 +-
 ...xt => connecting_linux_paragraph_13.5.txt} |  0
 ...ecting_linux_paragraph_13.5_metadata.json} |  5 +-
 ...xt => connecting_linux_paragraph_13.6.txt} |  0
 ...ecting_linux_paragraph_13.6_metadata.json} |  5 +-
 .../connecting_linux_paragraph_14.1.txt       | 10 ++++
 ...necting_linux_paragraph_14.1_metadata.json | 12 +++++
 ...nnecting_linux_paragraph_5.1_metadata.json |  1 +
 ...nnecting_linux_paragraph_5.2_metadata.json |  1 +
 .../account/account_macos_paragraph_11.1.txt  |  3 ++
 ...account_macos_paragraph_11.1_metadata.json |  1 +
 .../account_macos_paragraph_4.1_metadata.json |  1 +
 .../account_macos_paragraph_5.1_metadata.json |  1 +
 .../account_macos_paragraph_5.2_metadata.json |  1 +
 .../account_macos_paragraph_5.3_metadata.json |  1 +
 .../account_macos_paragraph_5.4_metadata.json |  1 +
 .../account_macos_paragraph_5.5_metadata.json |  1 +
 .../account_macos_paragraph_6.1_metadata.json |  1 +
 .../account_macos_paragraph_7.1_metadata.json |  1 +
 .../account_macos_paragraph_7.2_metadata.json |  1 +
 .../account/account_macos_paragraph_9.1.txt   |  9 ++--
 .../account_macos_paragraph_9.1_metadata.json |  1 +
 .../connecting_macos_paragraph_10.1.txt       | 33 -------------
 .../connecting_macos_paragraph_11.1.txt       | 43 ++++++++++++++---
 ...necting_macos_paragraph_11.1_metadata.json |  5 +-
 .../connecting_macos_paragraph_12.1.txt       | 18 +++-----
 ...necting_macos_paragraph_12.1_metadata.json | 14 +++---
 .../connecting_macos_paragraph_12.2.txt       | 17 -------
 ...necting_macos_paragraph_12.2_metadata.json | 11 -----
 .../connecting_macos_paragraph_13.1.txt       | 25 +++++-----
 ...necting_macos_paragraph_13.1_metadata.json |  8 +++-
 .../connecting_macos_paragraph_13.2.txt       | 20 ++++++--
 ...necting_macos_paragraph_13.2_metadata.json |  7 +--
 ...xt => connecting_macos_paragraph_13.3.txt} |  0
 ...ecting_macos_paragraph_13.3_metadata.json} |  5 +-
 ...xt => connecting_macos_paragraph_13.4.txt} |  0
 ...ecting_macos_paragraph_13.4_metadata.json} |  5 +-
 ...xt => connecting_macos_paragraph_13.5.txt} |  0
 ...ecting_macos_paragraph_13.5_metadata.json} |  5 +-
 ...xt => connecting_macos_paragraph_13.6.txt} |  0
 ...ecting_macos_paragraph_13.6_metadata.json} |  5 +-
 .../connecting_macos_paragraph_14.1.txt       | 15 ++++++
 ...necting_macos_paragraph_14.1_metadata.json | 12 +++++
 .../connecting_macos_paragraph_14.2.txt       |  3 ++
 ...necting_macos_paragraph_14.2_metadata.json | 12 +++++
 ...nnecting_macos_paragraph_5.1_metadata.json |  1 +
 ...nnecting_macos_paragraph_5.2_metadata.json |  1 +
 .../account_windows_paragraph_11.1.txt        |  3 ++
 ...count_windows_paragraph_11.1_metadata.json |  1 +
 ...ccount_windows_paragraph_4.1_metadata.json |  1 +
 ...ccount_windows_paragraph_4.2_metadata.json |  1 +
 .../account/account_windows_paragraph_4.3.txt |  8 ++++
 ...ccount_windows_paragraph_4.3_metadata.json |  4 ++
 ...ccount_windows_paragraph_4.4_metadata.json |  1 +
 ...ccount_windows_paragraph_6.1_metadata.json |  1 +
 ...ccount_windows_paragraph_6.2_metadata.json |  1 +
 ...ccount_windows_paragraph_6.3_metadata.json |  1 +
 .../account/account_windows_paragraph_9.1.txt |  9 ++--
 ...ccount_windows_paragraph_9.1_metadata.json |  1 +
 .../connecting_windows_paragraph_10.1.txt     |  5 --
 ...cting_windows_paragraph_10.1_metadata.json | 11 -----
 .../connecting_windows_paragraph_11.1.txt     | 29 ++++--------
 ...cting_windows_paragraph_11.1_metadata.json | 11 +++--
 .../connecting_windows_paragraph_12.1.txt     | 22 +++++++++
 ...ting_windows_paragraph_12.1_metadata.json} |  5 +-
 ... => connecting_windows_paragraph_12.2.txt} |  0
 ...cting_windows_paragraph_12.2_metadata.json | 12 +++++
 ... => connecting_windows_paragraph_12.3.txt} |  0
 ...ting_windows_paragraph_12.3_metadata.json} |  5 +-
 .../connecting_windows_paragraph_4.1.txt      |  1 +
 ...ecting_windows_paragraph_4.1_metadata.json |  1 +
 128 files changed, 533 insertions(+), 355 deletions(-)
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/{os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json => generic/connecting/connecting_paragraph_10_metadata.json} (53%)
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.2.txt => connecting_linux_paragraph_13.2.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.2_metadata.json => connecting_linux_paragraph_13.2_metadata.json} (61%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.3.txt => connecting_linux_paragraph_13.3.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.4_metadata.json => connecting_linux_paragraph_13.3_metadata.json} (61%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.4.txt => connecting_linux_paragraph_13.4.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.3_metadata.json => connecting_linux_paragraph_13.4_metadata.json} (61%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.5.txt => connecting_linux_paragraph_13.5.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.5_metadata.json => connecting_linux_paragraph_13.5_metadata.json} (61%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.6.txt => connecting_linux_paragraph_13.6.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.6_metadata.json => connecting_linux_paragraph_13.6_metadata.json} (65%)
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.3.txt => connecting_macos_paragraph_13.3.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.3_metadata.json => connecting_macos_paragraph_13.3_metadata.json} (61%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.4.txt => connecting_macos_paragraph_13.4.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.4_metadata.json => connecting_macos_paragraph_13.4_metadata.json} (61%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.5.txt => connecting_macos_paragraph_13.5.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.5_metadata.json => connecting_macos_paragraph_13.5_metadata.json} (61%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.6.txt => connecting_macos_paragraph_13.6.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.6_metadata.json => connecting_macos_paragraph_13.6_metadata.json} (65%)
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.2_metadata.json => connecting_windows_paragraph_12.1_metadata.json} (63%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.2.txt => connecting_windows_paragraph_12.2.txt} (100%)
 create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.3.txt => connecting_windows_paragraph_12.3.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.3_metadata.json => connecting_windows_paragraph_12.3_metadata.json} (63%)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 1795ee71554..27c1bf3fea6 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -145,6 +145,17 @@ endif
 
 This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it.
 
+### Non OS-related if-statements
+
+Due to the way jinja parses the sourcefiles, the script slightly alters non os-specific if-statements as well. It expects if-statements of the following form:
+
+```
+{%- if site == gent %}
+{% if site != (gent or brussel) %}
+```
+
+All spaces and the dash are optional. City names don't need to be fully lowercase since the parser will capitalize them properly anyway.
+
 ### html syntax
 
 The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. 
diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 60776fcb379..3129ccaf566 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -620,6 +620,12 @@ def jinja_parser(filename, copy_location, options):
     # Mangle the OS-related if-statements
     mangle_ifs(copy_location, filename, options)
 
+    if options[VERBOSE]:
+        print("Altering other if-statements to parse properly")
+
+    # Alter the other if-statements
+    alter_ifs(filename, options)
+
     # Use Jinja2 to replace the macros
     template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)])
     templateEnv = Environment(loader=template_loader)
@@ -627,7 +633,7 @@ def jinja_parser(filename, copy_location, options):
     rendered_content = template.render(combined_context)
 
     if options[VERBOSE]:
-        print("jinja parsing finished\nWriting to location: " + copy_location)
+        print("jinja parsing finished\nWriting jinja-parsed file to location: " + copy_location)
 
     # Save the rendered content to a new file
     with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file:
@@ -750,6 +756,32 @@ def mangle_ifs(directory, filename, options):
                 write_file.write(new_line)
 
 
+def alter_ifs(filename, options):
+    """
+    Function that further adapts the if-statements in a file and writes it to a location where the jinja parser will use it.
+    This is because the jinja parser doesn't seem to be able to handle statements like {% site == gent %} with context {'site': 'Gent'} in this case.
+    These statements get changed to {% site == 'Gent' %} in this function.
+
+    :param filename: the filename of the file to be transformed
+    :param options: dictionary containing the options given by the user
+    :return:
+    """
+
+    with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'r') as read_file:
+        content = read_file.read()
+
+    pattern = r'(\{%-?\s?[a-zA-Z\s]*?[!=]=\s?\(?)([a-zA-Z\s]+(?:\sor\s[a-zA-Z\s]+)*)(\)?\s?%})'
+    content = re.sub(pattern,
+                     lambda match: (f"{match.group(1)}" +
+                                    " or ".join([f"'{city.strip().capitalize()}'" for city in match.group(2).split(" or ")]) +
+                                    f"{match.group(3)}"
+                                    ),
+                     content)
+
+    with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'w') as write_file:
+        write_file.write(content)
+
+
 def make_valid_title(title):
     """
     function that makes sure all titles can be used as valid filenames
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
index f486b9b1348..7b0a39279e4 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
@@ -1,3 +1,7 @@
+After you have uploaded your public key you will receive an e-mail with
+a link to confirm your e-mail address. After confirming your e-mail
+address the VSC staff will review and if applicable approve your
+account.
 Welcome e-mail
 Within one day, you should receive a Welcome e-mail with your VSC
 account details.
@@ -12,7 +16,3 @@ Kind regards,
 -- The VSC administrators
 Now, you can start using the HPC. You can always look up your VSC id later
 by visiting <https://account.vscentrum.be>.
-Adding multiple SSH public keys (optional)
-In case you are connecting from different computers to the login nodes,
-it is advised to use separate SSH public keys to do so. You should
-follow these steps.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json
index 4b5b5202d1c..e417029c16f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "account",
-    "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
+    "subtitle": "Welcome-e-mail",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "",
     "previous_title": "account_paragraph_9",
     "next_title": "account_paragraph_11",
     "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/account/#adding-multiple-ssh-public-keys-optional"
+    "reference_link": "https://docs.hpc.ugent.be/account/#welcome-e-mail"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json
index a5df035df49..e43e729aa74 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Computation-Workflow-on-the-HPC",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 2,
     "directory": "account",
     "parent_title": "",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
index 726ce9f94fa..cdba091d7df 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Getting-ready-to-request-an-account",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 2,
     "directory": "account",
     "links": {
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
index 257f886c6e0..0b22e2986a0 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Getting-ready-to-request-an-account",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 2,
     "directory": "account",
     "links": {
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
index b94f233779b..bd2f73195a6 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "How-do-SSH-keys-work",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt
index 125b566419a..6c5695dfff3 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt
@@ -6,8 +6,9 @@ Select "UGent" in the dropdown box and optionally select "Save my preference"
 and "permanently".
 Click "Confirm"
 You will now be taken to the authentication page of your institute.
-After you log in using your UGent login and password, you will be asked to
-upload the file that contains your public key, i.e., the file
-"id_rsa.pub" which you have generated earlier. Make sure that your
-public key is actually accepted for upload, because if it is in a wrong
-format, wrong type or too short, then it will be refused.
+You will now have to log in with CAS using your UGent account.
+You either have a login name of maximum 8 characters, or a (non-UGent)
+email address if you are an external user. In case of problems with your
+UGent password, please visit: <https://password.ugent.be/>. After
+logging in, you may be requested to share your information. Click "Yes,
+continue".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json
index 6d186b6ff46..6a77c48dbd1 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Applying-for-the-account",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 2,
     "directory": "account",
     "parent_title": "",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt
new file mode 100644
index 00000000000..5c715d218a1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt
@@ -0,0 +1,24 @@
+You can exit the connection at anytime by entering:
+$ exit
+logout
+Connection to login.hpc.ugent.be closed.
+ tip "tip: Setting your Language right"
+    You may encounter a warning message similar to the following one during connecting:
+    perl: warning: Setting locale failed.
+    perl: warning: Please check that your locale settings:
+    LANGUAGE = (unset),
+    LC_ALL = (unset),
+    LC_CTYPE = "UTF-8",
+    LANG = (unset)
+        are supported and installed on your system.
+    perl: warning: Falling back to the standard locale ("C").
+    or any other error message complaining about the locale.
+    This means that the correct "locale" has not yet been properly specified on your local machine. Try:
+    LANG=
+    LC_COLLATE="C"
+    LC_CTYPE="UTF-8"
+    LC_MESSAGES="C"
+    LC_MONETARY="C"
+    LC_NUMERIC="C"
+    LC_TIME="C"
+    LC_ALL=
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json
similarity index 53%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json
index 4c6e5477119..96a1f9cee80 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
+    "parent_title": "",
     "previous_title": "connecting_paragraph_9",
     "next_title": "connecting_paragraph_11",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure"
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt
deleted file mode 100644
index df00d4ed2a4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-Fast file transfer for large datasets
-See the section on rsync in chapter 5 of the Linux intro manual.
-Changing login nodes
-It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node.
-For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC:
-ssh gligar07.gastly.os
-This is also possible the other way around.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json
deleted file mode 100644
index 0543efa4083..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Changing-login-nodes",
-    "title_depth": 2,
-    "directory": "connecting",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync"
-    },
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_13",
-    "next_title": "connecting_paragraph_15",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt
index b2197618647..df00d4ed2a4 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt
@@ -1,12 +1,7 @@
-If you want to find out which login host you are connected to, you can use the hostname command.
-$ hostname
-gligar07.gastly.os
-$ ssh gligar08.gastly.os
-$ hostname
-gligar08.gastly.os
-
-Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux.
-These can make sessions that 'survives' across disconnects.
-You can find more information on how to use these tools here (or on other online sources):
-- screen
-- tmux
\ No newline at end of file
+Fast file transfer for large datasets
+See the section on rsync in chapter 5 of the Linux intro manual.
+Changing login nodes
+It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node.
+For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC:
+ssh gligar07.gastly.os
+This is also possible the other way around.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
index d23146ed79f..ff9c22397d1 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
@@ -1,15 +1,15 @@
 {
     "main_title": "connecting",
     "subtitle": "Changing-login-nodes",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
     "links": {
-        "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/",
-        "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/"
+        "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync"
     },
     "parent_title": "",
     "previous_title": "connecting_paragraph_14",
-    "next_title": null,
+    "next_title": "connecting_paragraph_16",
     "OS": "generic",
     "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt
new file mode 100644
index 00000000000..dd4f3269fb5
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt
@@ -0,0 +1,11 @@
+If you want to find out which login host you are connected to, you can use the hostname command.
+$ hostname
+gligar07.gastly.os
+$ ssh gligar08.gastly.os
+$ hostname
+gligar08.gastly.os
+Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux.
+These can make sessions that 'survives' across disconnects.
+You can find more information on how to use these tools here (or on other online sources):
+- screen
+- tmux
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json
new file mode 100644
index 00000000000..623be877f5b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json
@@ -0,0 +1,16 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Changing-login-nodes",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
+    "title_depth": 2,
+    "directory": "connecting",
+    "links": {
+        "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/",
+        "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/"
+    },
+    "parent_title": "",
+    "previous_title": "connecting_paragraph_15",
+    "next_title": null,
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json
index ef0bc5473b0..783e60c1ab5 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "Connecting-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 1,
     "directory": "connecting",
     "links": {
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
index b150c8fbb28..49c4572f3b2 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
@@ -7,7 +7,7 @@ networks, and from (most) Belgian commercial internet providers.
 All other IP domains are blocked by default. If you are connecting from
 an IP address that is not allowed direct access, you have the following
 options to get access to VSC login nodes:
--   Use an VPN connection to connect to UGent the network (recommended).
+-   Use an VPN connection to connect to UGent the network (recommended). See <https://helpdesk.ugent.be/vpn/en/> for more information. 
 -   Whitelist your IP address automatically by accessing
     <https://firewall.vscentrum.be> and log in with your UGent account.
     -   While this web connection is active new SSH sessions can be
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json
index 39ee53fcf0b..10f3e042d9a 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "Connection-restrictions",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
     "parent_title": "",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
index 31dd6463266..db490973b7f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
@@ -2,7 +2,6 @@ Trying to establish an SSH connection from an IP address that does not
 adhere to these restrictions will result in an immediate failure to
 connect, with an error message like:
 ssh_exchange_identification: read: Connection reset by peer
-
 First Time connection to the HPC infrastructure
 The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH.
 If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
index 471e6bfcbf2..e30467d0799 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
     "links": {
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt
index 472991adada..862e6952252 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt
@@ -2,14 +2,12 @@ Congratulations, you're on the HPC infrastructure now!
 To find out where you have landed you can print the current working directory:
 $ pwd
 /user/home/gent/vsc400/vsc40000
-
 Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own
 subdirectory structure, copy and prepare your applications, compile and
 test them and submit your jobs on the HPC.
 $ cd /apps/gent/tutorials
 $ ls
 Intro-HPC/
-
 This directory currently contains all training material for the Introduction to the HPC. More
 relevant training material to work with the HPC can always be added later in
 this directory.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json
index 1c7ae8ed267..66b2a89fbb1 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
     "parent_title": "",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt
index 35996afe4da..aa590b9b269 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt
@@ -15,7 +15,6 @@ $ tree -L 2
     |-- example.pbs
     '-- example.sh
 9 directories, 5 files
-
 This directory contains:
 1.  This HPC Tutorial (in either a Mac, Linux or      Windows version).
 2.  An examples subdirectory, containing all the examples that you need in this
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json
index 709753e4dc4..6e3f90fbe8a 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
     "parent_title": "",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt
index 096c74c1372..634df6034b1 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt
@@ -1,5 +1,4 @@
-$ cd examples
-
+cd examples
  tip
     Typing cd ex followed by tab (the Tab-key) will generate the cd examples
     command. Command-line completion (also tab completion) is a common feature of the bash command
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
index 0241e0bd6b9..074e7e891ce 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
     "links": {
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt
index 5a634e6bddc..ad2fee7457f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt
@@ -1,27 +1,19 @@
-$ cp -r /apps/gent/tutorials/Intro-HPC/examples ~/
-
-You can exit the connection at anytime by entering:
-$ exit
-logout
-Connection to login.hpc.ugent.be closed.
-
- tip "tip: Setting your Language right"
-    You may encounter a warning message similar to the following one during connecting:
-    perl: warning: Setting locale failed.
-    perl: warning: Please check that your locale settings:
-    LANGUAGE = (unset),
-    LC_ALL = (unset),
-    LC_CTYPE = "UTF-8",
-    LANG = (unset)
-        are supported and installed on your system.
-    perl: warning: Falling back to the standard locale ("C").
-    or any other error message complaining about the locale.
-    This means that the correct "locale" has not yet been properly specified on your local machine. Try:
-    LANG=
-    LC_COLLATE="C"
-    LC_CTYPE="UTF-8"
-    LC_MESSAGES="C"
-    LC_MONETARY="C"
-    LC_NUMERIC="C"
-    LC_TIME="C"
-    LC_ALL=
+cp -r /apps/gent/tutorials/Intro-HPC/examples ~/
+Go to your home directory, check your own private examples directory, ... and start working.
+cd
+ls -l
+Upon connecting you will see a login message containing your last login time stamp and a basic overview of the current cluster utilisation.
+Last login: Thu Mar 18 13:15:09 2021 from gligarha02.gastly.os
+ STEVIN HPC-UGent infrastructure status on Mon, 19 Feb 2024 10:00:01
+      cluster         - full - free -  part - total - running - queued
+                        nodes  nodes   free   nodes   jobs      jobs
+ -------------------------------------------------------------------------
+           skitty          39      0     26      68      1839     5588
+           joltik           6      0      1      10        29       18
+            doduo          22      0     75     128      1397    11933
+         accelgor           4      3      2       9        18        1
+          donphan           0      0     16      16        16       13
+          gallade           2      0      5      16        19      136
+For a full view of the current loads and queues see:
+https://hpc.ugent.be/clusterstate/
+Updates on current system status and planned maintenance can be found on https://www.ugent.be/hpc/en/infrastructure/status
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json
index 40b04f24e9f..bd1d462e614 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
     "parent_title": "",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
index b2734cc9f89..dfc59211792 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
@@ -1,4 +1,7 @@
 Adding multiple SSH public keys (optional)
+In case you are connecting from different computers to the login nodes,
+it is advised to use separate SSH public keys to do so. You should
+follow these steps.
 1.  Create a new public/private SSH key pair from the new computer.
     Repeat the process described in
     section Generate a public/private key pair with OpenSSH.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json
index 72b9f92061c..ffdeaf550e0 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Applying-for-the-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
index 52e1569a8a7..bcc0552177d 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "How-do-SSH-keys-work",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json
index 4636f13a4b4..7654a65253a 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Test-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json
index ca9c4c7dc1d..32f1120307f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Test-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json
index d902f6a0838..722ba1a2ad4 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json
index 1edae26d97b..4f65f6ebf36 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json
index 29affc0335e..468fb5d0938 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json
index acf12bc0a7d..fb82c40a7d7 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Using-an-SSH-agent-(optional",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
index b6b1e052345..4214d6cb321 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Using-an-SSH-agent-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json
index 35466be5b56..de9700c7a5b 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Using-an-SSH-agent-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt
index a9059b224bf..815c414e059 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt
@@ -1,6 +1,7 @@
 Applying for the account
+After you log in using your UGent login and password, you will be asked to
+upload the file that contains your public key, i.e., the file
+"id_rsa.pub" which you have generated earlier. Make sure that your
+public key is actually accepted for upload, because if it is in a wrong
+format, wrong type or too short, then it will be refused.
 This file has been stored in the directory "~/.ssh/".
-After you have uploaded your public key you will receive an e-mail with
-a link to confirm your e-mail address. After confirming your e-mail
-address the VSC staff will review and if applicable approve your
-account.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json
index 21988388723..31c14d853b3 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Applying-for-the-account",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 2,
     "directory": "account",
     "parent_title": "account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt
deleted file mode 100644
index 4c8894438c9..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-First Time connection to the HPC infrastructure
-    A locale is a set of parameters that defines the user's language, country and
-    any special variant preferences that the user wants to see in their user
-    interface. Usually a locale identifier consists of at least a language
-    identifier and a region identifier.
-    Open the .bashrc on your local machine with your favourite editor and
-    add the following lines:
-    
-    $ nano ~/.bashrc
-    ...
-    export LANGUAGE="en_US.UTF-8"
-    export LC_ALL="en_US.UTF-8"
-    export LC_CTYPE="en_US.UTF-8"
-    export LANG="en_US.UTF-8"
-    ...
-    
-     tip "tip: vi"
-        To start entering text in vi: move to the place you want to start
-        entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
-        To exit vi without saving your changes, enter ""ESC":q!"
-    
-    
-    or alternatively (if you are not comfortable with the Linux editors),
-    again on your local machine:
-    
-    echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
-    
-    You can now log out, open a new terminal/shell on your local machine and
-    reconnect to the login node, and you should not get these warnings anymore.
-    
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json
deleted file mode 100644
index 364c81834cf..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
-    "previous_title": "connecting_paragraph_9",
-    "next_title": "connecting_paragraph_11",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt
index d872c89a0f8..1d912924535 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt
@@ -1,6 +1,37 @@
-Transfer Files to/from the HPC
-Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
-The preferred way to transfer files is by using an scp or sftp via the
-secure OpenSSH protocol.  ships with an implementation of OpenSSH, so you
-don't need to install any third-party software to use it. Just open a
-terminal window and jump in!
+First Time connection to the HPC infrastructure
+    A locale is a set of parameters that defines the user's language, country and
+    any special variant preferences that the user wants to see in their user
+    interface. Usually a locale identifier consists of at least a language
+    identifier and a region identifier.
+     Note
+        If you try to set a non-supported locale, then it will be automatically
+        set to the default. Currently the default is en_US.UFT-8 or en_US,
+        depending on whether your originally (non-supported) locale was UTF-8 or not.
+    Open the .bashrc on your local machine with your favourite editor and
+    add the following lines:
+    
+    $ nano ~/.bashrc
+    ...
+    export LANGUAGE="en_US.UTF-8"
+    export LC_ALL="en_US.UTF-8"
+    export LC_CTYPE="en_US.UTF-8"
+    export LANG="en_US.UTF-8"
+    ...
+    
+     tip "tip: vi"
+        To start entering text in vi: move to the place you want to start
+        entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
+        To exit vi without saving your changes, enter ""ESC":q!"
+    
+    
+    or alternatively (if you are not comfortable with the Linux editors),
+    again on your local machine:
+    
+    echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
+    
+    You can now log out, open a new terminal/shell on your local machine and
+    reconnect to the login node, and you should not get these warnings anymore.
+    
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json
index 420f73742f5..ef14b084e5f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
-    "subtitle": "Transfer-Files-tofrom-the-HPC",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
     "parent_title": "Connecting-to-the-HPC-infrastructure",
     "previous_title": "connecting_paragraph_10",
     "next_title": "connecting_paragraph_12",
     "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc"
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt
index 8d0031fcca9..d872c89a0f8 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt
@@ -1,12 +1,6 @@
-Transfer Files tofrom the HPC
-Using scp
-Secure copy or SCP is a tool (command) for securely transferring files between a local
-host (= your computer) and a remote host (the HPC). It is based on the
-Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e.,
-copy) command, but can copy files to or from remote machines.
-It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if
-you have symlinks to them in your home directory. See 
-the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux
- for how to do this.
-Open an additional terminal window and check that you're working on your
-local machine.
+Transfer Files to/from the HPC
+Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
+The preferred way to transfer files is by using an scp or sftp via the
+secure OpenSSH protocol.  ships with an implementation of OpenSSH, so you
+don't need to install any third-party software to use it. Just open a
+terminal window and jump in!
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json
index 19eba778d90..081156a5d16 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json
@@ -1,14 +1,12 @@
 {
     "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "title_depth": 3,
+    "subtitle": "Transfer-Files-tofrom-the-HPC",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
+    "title_depth": 2,
     "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
-    },
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
     "previous_title": "connecting_paragraph_11",
-    "next_title": "connecting_linux_paragraph_12.2",
+    "next_title": "connecting_paragraph_13",
     "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt
index a0496edfb14..8d0031fcca9 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt
@@ -1,10 +1,12 @@
 Transfer Files tofrom the HPC
-Using a GUI
-If you prefer a GUI to transfer files back and forth to the HPC, you can
-use your file browser. Open your file browser and press
-++"Ctrl"+"l"++
-This should open up a address bar where you can enter a URL.
-Alternatively, look for the "connect to server" option in your file
-browsers menu.
-Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter.
-You should now be able to browse files on the HPC in your file browser.
+Using scp
+Secure copy or SCP is a tool (command) for securely transferring files between a local
+host (= your computer) and a remote host (the HPC). It is based on the
+Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e.,
+copy) command, but can copy files to or from remote machines.
+It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if
+you have symlinks to them in your home directory. See 
+the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux
+ for how to do this.
+Open an additional terminal window and check that you're working on your
+local machine.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
index d634a356654..988c10028d8 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
@@ -1,11 +1,15 @@
 {
     "main_title": "connecting",
-    "subtitle": "Using-a-GUI",
+    "subtitle": "Using-scp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
+    },
     "previous_title": "connecting_paragraph_12",
-    "next_title": "connecting_paragraph_14",
+    "next_title": "connecting_linux_paragraph_13.2",
     "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui"
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json
similarity index 61%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json
index 0b3a3418c55..43affa4e36c 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "Using-scp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_linux_paragraph_12.1",
-    "next_title": "connecting_linux_paragraph_12.3",
+    "previous_title": "connecting_linux_paragraph_13.1",
+    "next_title": "connecting_linux_paragraph_13.3",
     "OS": "linux",
     "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json
similarity index 61%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json
index 5a401911cab..ccc74bb5b94 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "Using-scp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_linux_paragraph_12.3",
-    "next_title": "connecting_linux_paragraph_12.5",
+    "previous_title": "connecting_linux_paragraph_13.2",
+    "next_title": "connecting_linux_paragraph_13.4",
     "OS": "linux",
     "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json
similarity index 61%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json
index 5624749ede8..9ffcc4121f4 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "Using-scp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_linux_paragraph_12.2",
-    "next_title": "connecting_linux_paragraph_12.4",
+    "previous_title": "connecting_linux_paragraph_13.3",
+    "next_title": "connecting_linux_paragraph_13.5",
     "OS": "linux",
     "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json
similarity index 61%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json
index a479f66e7e0..8e3b4056b6b 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "Using-sftp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_linux_paragraph_12.4",
-    "next_title": "connecting_linux_paragraph_12.6",
+    "previous_title": "connecting_linux_paragraph_13.4",
+    "next_title": "connecting_linux_paragraph_13.6",
     "OS": "linux",
     "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json
similarity index 65%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json
index 9c744fd5133..1fc868ffab2 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "Using-sftp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
@@ -8,8 +9,8 @@
         "0": "",
         "1": ""
     },
-    "previous_title": "connecting_linux_paragraph_12.5",
-    "next_title": "connecting_linux_paragraph_12.7",
+    "previous_title": "connecting_linux_paragraph_13.5",
+    "next_title": "connecting_linux_paragraph_13.7",
     "OS": "linux",
     "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt
new file mode 100644
index 00000000000..a0496edfb14
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt
@@ -0,0 +1,10 @@
+Transfer Files tofrom the HPC
+Using a GUI
+If you prefer a GUI to transfer files back and forth to the HPC, you can
+use your file browser. Open your file browser and press
+++"Ctrl"+"l"++
+This should open up a address bar where you can enter a URL.
+Alternatively, look for the "connect to server" option in your file
+browsers menu.
+Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter.
+You should now be able to browse files on the HPC in your file browser.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json
new file mode 100644
index 00000000000..e3c48fe4829
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-a-GUI",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_paragraph_13",
+    "next_title": "connecting_paragraph_15",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
index 05996eb5df2..55613bca732 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "Connect",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json
index 85a826e41a3..21b63518804 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "Connect",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
index b2734cc9f89..dfc59211792 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
@@ -1,4 +1,7 @@
 Adding multiple SSH public keys (optional)
+In case you are connecting from different computers to the login nodes,
+it is advised to use separate SSH public keys to do so. You should
+follow these steps.
 1.  Create a new public/private SSH key pair from the new computer.
     Repeat the process described in
     section Generate a public/private key pair with OpenSSH.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json
index dd8b3400419..d9d3c33f876 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Applying-for-the-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
index 33d083958b9..5400014a85c 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "How-do-SSH-keys-work",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json
index c75d6aede58..028d9d25f7f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Test-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json
index 7f6c80a32f6..dfec6f6fd5a 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Test-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json
index 7c0f0d2a04d..5a10e780b45 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json
index 346108200ac..8da465c1f24 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json
index 25baa1e073f..9d6f7b1a741 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json
index b8931a423d3..17a34a2f80b 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Using-an-SSH-agent-(optional",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
index c43391b146e..f9b6c751fd4 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Using-an-SSH-agent-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json
index 519b58bb151..072a43cb3e4 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Using-an-SSH-agent-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt
index d11380c2519..5a5a52da062 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt
@@ -1,11 +1,12 @@
 Applying for the account
+After you log in using your UGent login and password, you will be asked to
+upload the file that contains your public key, i.e., the file
+"id_rsa.pub" which you have generated earlier. Make sure that your
+public key is actually accepted for upload, because if it is in a wrong
+format, wrong type or too short, then it will be refused.
 This file has been stored in the directory "~/.ssh/".
  tip
     As ".ssh" is an invisible directory, the Finder will not show it by
     default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++),
     which will allow you to enter the name of a directory, which you would
     like to open in Finder. Here, type "~/.ssh" and press enter.
-After you have uploaded your public key you will receive an e-mail with
-a link to confirm your e-mail address. After confirming your e-mail
-address the VSC staff will review and if applicable approve your
-account.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json
index 6b6e8c72703..86c8c2048bf 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Applying-for-the-account",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 2,
     "directory": "account",
     "parent_title": "account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt
deleted file mode 100644
index 4c8894438c9..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-First Time connection to the HPC infrastructure
-    A locale is a set of parameters that defines the user's language, country and
-    any special variant preferences that the user wants to see in their user
-    interface. Usually a locale identifier consists of at least a language
-    identifier and a region identifier.
-    Open the .bashrc on your local machine with your favourite editor and
-    add the following lines:
-    
-    $ nano ~/.bashrc
-    ...
-    export LANGUAGE="en_US.UTF-8"
-    export LC_ALL="en_US.UTF-8"
-    export LC_CTYPE="en_US.UTF-8"
-    export LANG="en_US.UTF-8"
-    ...
-    
-     tip "tip: vi"
-        To start entering text in vi: move to the place you want to start
-        entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
-        To exit vi without saving your changes, enter ""ESC":q!"
-    
-    
-    or alternatively (if you are not comfortable with the Linux editors),
-    again on your local machine:
-    
-    echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
-    
-    You can now log out, open a new terminal/shell on your local machine and
-    reconnect to the login node, and you should not get these warnings anymore.
-    
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt
index d872c89a0f8..1d912924535 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt
@@ -1,6 +1,37 @@
-Transfer Files to/from the HPC
-Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
-The preferred way to transfer files is by using an scp or sftp via the
-secure OpenSSH protocol.  ships with an implementation of OpenSSH, so you
-don't need to install any third-party software to use it. Just open a
-terminal window and jump in!
+First Time connection to the HPC infrastructure
+    A locale is a set of parameters that defines the user's language, country and
+    any special variant preferences that the user wants to see in their user
+    interface. Usually a locale identifier consists of at least a language
+    identifier and a region identifier.
+     Note
+        If you try to set a non-supported locale, then it will be automatically
+        set to the default. Currently the default is en_US.UFT-8 or en_US,
+        depending on whether your originally (non-supported) locale was UTF-8 or not.
+    Open the .bashrc on your local machine with your favourite editor and
+    add the following lines:
+    
+    $ nano ~/.bashrc
+    ...
+    export LANGUAGE="en_US.UTF-8"
+    export LC_ALL="en_US.UTF-8"
+    export LC_CTYPE="en_US.UTF-8"
+    export LANG="en_US.UTF-8"
+    ...
+    
+     tip "tip: vi"
+        To start entering text in vi: move to the place you want to start
+        entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
+        To exit vi without saving your changes, enter ""ESC":q!"
+    
+    
+    or alternatively (if you are not comfortable with the Linux editors),
+    again on your local machine:
+    
+    echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
+    echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
+    
+    You can now log out, open a new terminal/shell on your local machine and
+    reconnect to the login node, and you should not get these warnings anymore.
+    
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json
index 1425455ade8..323292b910e 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
-    "subtitle": "Transfer-Files-tofrom-the-HPC",
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 2,
     "directory": "connecting",
     "parent_title": "Connecting-to-the-HPC-infrastructure",
     "previous_title": "connecting_paragraph_10",
     "next_title": "connecting_paragraph_12",
     "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc"
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt
index 8d0031fcca9..d872c89a0f8 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt
@@ -1,12 +1,6 @@
-Transfer Files tofrom the HPC
-Using scp
-Secure copy or SCP is a tool (command) for securely transferring files between a local
-host (= your computer) and a remote host (the HPC). It is based on the
-Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e.,
-copy) command, but can copy files to or from remote machines.
-It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if
-you have symlinks to them in your home directory. See 
-the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux
- for how to do this.
-Open an additional terminal window and check that you're working on your
-local machine.
+Transfer Files to/from the HPC
+Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
+The preferred way to transfer files is by using an scp or sftp via the
+secure OpenSSH protocol.  ships with an implementation of OpenSSH, so you
+don't need to install any third-party software to use it. Just open a
+terminal window and jump in!
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json
index 332e6ed2996..8a420f36c2b 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json
@@ -1,14 +1,12 @@
 {
     "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "title_depth": 3,
+    "subtitle": "Transfer-Files-tofrom-the-HPC",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
+    "title_depth": 2,
     "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
-    },
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
     "previous_title": "connecting_paragraph_11",
-    "next_title": "connecting_macos_paragraph_12.2",
+    "next_title": "connecting_paragraph_13",
     "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt
deleted file mode 100644
index f1da0677a67..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-$ hostname
-<local-machine-name>
-If you're still using the terminal that is connected to the HPC, close the
-connection by typing "exit" in the terminal window.
-For example, we will copy the (local) file "localfile.txt" to your
-home directory on the HPC cluster. We first generate a small dummy
-"localfile.txt", which contains the word "Hello". Use your own VSC
-account, which is something like "vsc40000". Don't forget the colon (:) at the
-end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your
-local filesystem. You can even specify where to save the file on the
-remote filesystem by putting a path after the colon.
-$ echo "Hello" > localfile.txt
-$ ls -l 
-...
--rw-r--r-- 1 user  staff   6 Sep 18 09:37 localfile.txt
-$ scp localfile.txt vsc40000@login.hpc.ugent.be:
-localfile.txt     100%   6     0.0KB/s     00:00
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json
deleted file mode 100644
index d86cdd989ac..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_macos_paragraph_12.1",
-    "next_title": "connecting_macos_paragraph_12.3",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
index 20a4acb40a8..8d0031fcca9 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
@@ -1,15 +1,12 @@
 Transfer Files tofrom the HPC
-Using a GUI (Cyberduck)
-Cyberduck is a graphical alternative to the scp command. It can be
-installed from <https://cyberduck.io>.
-This is the one-time setup you will need to do before connecting:
-1.  After starting Cyberduck, the Bookmark tab will show up. To add a
-    new bookmark, click on the "+" sign on the bottom left of the
-    window. A new window will open.
-2.  In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)".
-3.  In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in
-    your VSC account id (this looks like vsc40000).
-4.  Select the location of your SSH private key in the "SSH Private Key" field.
-5.  Finally, type in a name for the bookmark in the "Nickname" field and
-    close the window by pressing on the red circle in the top left
-    corner of the window.
+Using scp
+Secure copy or SCP is a tool (command) for securely transferring files between a local
+host (= your computer) and a remote host (the HPC). It is based on the
+Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e.,
+copy) command, but can copy files to or from remote machines.
+It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if
+you have symlinks to them in your home directory. See 
+the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux
+ for how to do this.
+Open an additional terminal window and check that you're working on your
+local machine.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
index bd02ed8502f..9ec843ff0aa 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
@@ -1,11 +1,15 @@
 {
     "main_title": "connecting",
-    "subtitle": "Using-a-GUI-(Cyberduck)",
+    "subtitle": "Using-scp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
+    },
     "previous_title": "connecting_paragraph_12",
     "next_title": "connecting_macos_paragraph_13.2",
     "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck"
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
index 1d20edf411f..f1da0677a67 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
@@ -1,3 +1,17 @@
-To open the connection, click on the "Bookmarks" icon (which
-resembles an open book) and double-click on the bookmark you just
-created.
+$ hostname
+<local-machine-name>
+If you're still using the terminal that is connected to the HPC, close the
+connection by typing "exit" in the terminal window.
+For example, we will copy the (local) file "localfile.txt" to your
+home directory on the HPC cluster. We first generate a small dummy
+"localfile.txt", which contains the word "Hello". Use your own VSC
+account, which is something like "vsc40000". Don't forget the colon (:) at the
+end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your
+local filesystem. You can even specify where to save the file on the
+remote filesystem by putting a path after the colon.
+$ echo "Hello" > localfile.txt
+$ ls -l 
+...
+-rw-r--r-- 1 user  staff   6 Sep 18 09:37 localfile.txt
+$ scp localfile.txt vsc40000@login.hpc.ugent.be:
+localfile.txt     100%   6     0.0KB/s     00:00
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json
index 344ff690d54..dc57de365bf 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
-    "subtitle": "Using-a-GUI-(Cyberduck)",
+    "subtitle": "Using-scp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
     "previous_title": "connecting_macos_paragraph_13.1",
-    "next_title": "connecting_paragraph_14",
+    "next_title": "connecting_macos_paragraph_13.3",
     "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck"
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json
similarity index 61%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json
index 4fcc42d2337..5a4623c650d 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "Using-scp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_macos_paragraph_12.2",
-    "next_title": "connecting_macos_paragraph_12.4",
+    "previous_title": "connecting_macos_paragraph_13.2",
+    "next_title": "connecting_macos_paragraph_13.4",
     "OS": "macos",
     "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json
similarity index 61%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json
index 757b533cf8d..54b3fe19d58 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "Using-scp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_macos_paragraph_12.3",
-    "next_title": "connecting_macos_paragraph_12.5",
+    "previous_title": "connecting_macos_paragraph_13.3",
+    "next_title": "connecting_macos_paragraph_13.5",
     "OS": "macos",
     "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json
similarity index 61%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json
index d18c7c7deb5..0b9ba08e3b1 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "Using-sftp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_macos_paragraph_12.4",
-    "next_title": "connecting_macos_paragraph_12.6",
+    "previous_title": "connecting_macos_paragraph_13.4",
+    "next_title": "connecting_macos_paragraph_13.6",
     "OS": "macos",
     "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json
similarity index 65%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json
index a8a4f2a3bab..fe899ad9dbc 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "Using-sftp",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
@@ -8,8 +9,8 @@
         "0": "",
         "1": ""
     },
-    "previous_title": "connecting_macos_paragraph_12.5",
-    "next_title": "connecting_macos_paragraph_12.7",
+    "previous_title": "connecting_macos_paragraph_13.5",
+    "next_title": "connecting_macos_paragraph_13.7",
     "OS": "macos",
     "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt
new file mode 100644
index 00000000000..20a4acb40a8
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt
@@ -0,0 +1,15 @@
+Transfer Files tofrom the HPC
+Using a GUI (Cyberduck)
+Cyberduck is a graphical alternative to the scp command. It can be
+installed from <https://cyberduck.io>.
+This is the one-time setup you will need to do before connecting:
+1.  After starting Cyberduck, the Bookmark tab will show up. To add a
+    new bookmark, click on the "+" sign on the bottom left of the
+    window. A new window will open.
+2.  In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)".
+3.  In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in
+    your VSC account id (this looks like vsc40000).
+4.  Select the location of your SSH private key in the "SSH Private Key" field.
+5.  Finally, type in a name for the bookmark in the "Nickname" field and
+    close the window by pressing on the red circle in the top left
+    corner of the window.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json
new file mode 100644
index 00000000000..694b7682aa9
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-a-GUI-(Cyberduck)",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_paragraph_13",
+    "next_title": "connecting_macos_paragraph_14.2",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt
new file mode 100644
index 00000000000..1d20edf411f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt
@@ -0,0 +1,3 @@
+To open the connection, click on the "Bookmarks" icon (which
+resembles an open book) and double-click on the bookmark you just
+created.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json
new file mode 100644
index 00000000000..e32b1ab4c58
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "connecting",
+    "subtitle": "Using-a-GUI-(Cyberduck)",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "Transfer-Files-tofrom-the-HPC",
+    "previous_title": "connecting_macos_paragraph_14.1",
+    "next_title": "connecting_paragraph_15",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
index e17629a55f3..85b088b0e8c 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "Connect",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json
index 5c1d808739c..047d5863361 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "Connect",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
index 0863009f290..ca00a8a0f65 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
@@ -1,4 +1,7 @@
 Adding multiple SSH public keys (optional)
+In case you are connecting from different computers to the login nodes,
+it is advised to use separate SSH public keys to do so. You should
+follow these steps.
 1.  Create a new public/private SSH key pair from Putty. Repeat the
     process described in
     section Generate a public/private key pair.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json
index eb4dd3b3a57..4614c053f2c 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Applying-for-the-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
index ce74735c538..7dc9b50fbdd 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Get-PuTTY-A-free-telnetSSH-client",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
index 9616b41452a..773acaabf23 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Generating-a-publicprivate-key-pair",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
index de5d164bb7a..b082d381a64 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
@@ -27,3 +27,11 @@ Start PuTTYgen.exe it and follow these steps:
     "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the
     buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and
     "id_rsa.ppk" for the private key.
+6.  Finally, save an "OpenSSH" version of your private key (in
+    particular for later "X2Go" usage, see x2go) by entering the
+    "Conversions" menu and selecting "Export OpenSSH key" (do not select the
+    "force new file format" variant). Save the file in the same location
+    as in the previous step with filename "id_rsa". (If there is no
+    "Conversions" menu, you must update your "puttygen" version. If you
+    want to do this conversion afterwards, you can start with loading an
+    existing "id_rsa.ppk" and only do this conversions export.)
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
index 06b6e998c08..d803aeadb25 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
@@ -1,9 +1,13 @@
 {
     "main_title": "account",
     "subtitle": "Generating-a-publicprivate-key-pair",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
+    "links": {
+        "0": ""
+    },
     "previous_title": "account_windows_paragraph_4.2",
     "next_title": "account_windows_paragraph_4.4",
     "OS": "windows",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
index fba810e7299..ebd55060657 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Generating-a-publicprivate-key-pair",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json
index 69771b48c86..5fd697066b6 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Using-an-SSH-agent-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
index 24670708070..46808447a10 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Using-an-SSH-agent-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json
index d47ad3bd215..e33d002d248 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Using-an-SSH-agent-(optional)",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt
index 90c17263cf5..9fd23612756 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt
@@ -1,7 +1,8 @@
 Applying for the account
+After you log in using your UGent login and password, you will be asked to
+upload the file that contains your public key, i.e., the file
+"id_rsa.pub" which you have generated earlier. Make sure that your
+public key is actually accepted for upload, because if it is in a wrong
+format, wrong type or too short, then it will be refused.
 This file should have been stored in the directory
 "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh"
-After you have uploaded your public key you will receive an e-mail with
-a link to confirm your e-mail address. After confirming your e-mail
-address the VSC staff will review and if applicable approve your
-account.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json
index d01ac9c3c16..87cda41283f 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "account",
     "subtitle": "Applying-for-the-account",
+    "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 2,
     "directory": "account",
     "parent_title": "account",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt
deleted file mode 100644
index aaf5a585ebd..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-First Time connection to the HPC infrastructure
-    A locale is a set of parameters that defines the user's language, country and
-    any special variant preferences that the user wants to see in their user
-    interface. Usually a locale identifier consists of at least a language
-    identifier and a region identifier.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json
deleted file mode 100644
index 45c2bd2d90e..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
-    "previous_title": "connecting_paragraph_9",
-    "next_title": "connecting_paragraph_11",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
index a4f00ba7a5f..5aa8ca03374 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
@@ -1,20 +1,9 @@
-Transfer Files to/from the HPC
-Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
-WinSCP
-To transfer files to and from the cluster, we recommend the use of
-WinSCP, a graphical file management tool which can transfer files using
-secure protocols such as SFTP and SCP. WinSCP is freely available from
-<http://www.winscp.net>.
-To transfer your files using WinSCP,
-1.  Open the program
-2.  The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session"
-    1.  Click "New Site".
-    2.  Enter "login.hpc.ugent.be" in the "Host name" field.
-    3.  Enter your "vsc-account" in the "User name" field.
-    4.  Select "SCP" as the "file" protocol.
-    5.  Note that the password field remains empty.
-    6.  Click "Advanced...".
-    7.  Click "SSH > Authentication".
-    8.  Select your private key in the field "Private key file".
-3.  Press the "Save" button, to save the session under "Session > Sites" for future access.
-4.  Finally, when clicking on "Login", you will be asked for your key passphrase.
+First Time connection to the HPC infrastructure
+    A locale is a set of parameters that defines the user's language, country and
+    any special variant preferences that the user wants to see in their user
+    interface. Usually a locale identifier consists of at least a language
+    identifier and a region identifier.
+     Note
+        If you try to set a non-supported locale, then it will be automatically
+        set to the default. Currently the default is en_US.UFT-8 or en_US,
+        depending on whether your originally (non-supported) locale was UTF-8 or not.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json
index d9fbc64790a..d4b02dbc9fb 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
-    "subtitle": "WinSCP",
-    "title_depth": 3,
+    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
+    "title_depth": 2,
     "directory": "connecting",
-    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
+    "parent_title": "Connecting-to-the-HPC-infrastructure",
     "previous_title": "connecting_paragraph_10",
-    "next_title": "connecting_windows_paragraph_11.2",
+    "next_title": "connecting_paragraph_12",
     "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt
new file mode 100644
index 00000000000..67e5e454852
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt
@@ -0,0 +1,22 @@
+Transfer Files to/from the HPC
+Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
+WinSCP
+To transfer files to and from the cluster, we recommend the use of
+WinSCP, a graphical file management tool which can transfer files using
+secure protocols such as SFTP and SCP. WinSCP is freely available from
+<http://www.winscp.net>.
+To transfer your files using WinSCP,
+1.  Open the program
+2.  The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session"
+    1.  Click "New Site".
+    2.  Enter "login.hpc.ugent.be" in the "Host name" field.
+    3.  Enter your "vsc-account" in the "User name" field.
+    4.  Select "SCP" as the "file" protocol.
+    5.  Note that the password field remains empty.
+    
+    6.  Click "Advanced...".
+    7.  Click "SSH > Authentication".
+    8.  Select your private key in the field "Private key file".
+3.  Press the "Save" button, to save the session under "Session > Sites" for future access.
+4.  Finally, when clicking on "Login", you will be asked for your key passphrase.
+    
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json
similarity index 63%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json
index 65055dc0764..a4bbaee0f59 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "WinSCP",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "previous_title": "connecting_windows_paragraph_11.1",
-    "next_title": "connecting_windows_paragraph_11.3",
+    "previous_title": "connecting_paragraph_11",
+    "next_title": "connecting_windows_paragraph_12.2",
     "OS": "windows",
     "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json
new file mode 100644
index 00000000000..80a8ef763a1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "connecting",
+    "subtitle": "WinSCP",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
+    "title_depth": 3,
+    "directory": "connecting",
+    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
+    "previous_title": "connecting_windows_paragraph_12.1",
+    "next_title": "connecting_windows_paragraph_12.3",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json
similarity index 63%
rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json
rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json
index dd628f8e8cd..07760730d56 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json
@@ -1,11 +1,12 @@
 {
     "main_title": "connecting",
     "subtitle": "WinSCP",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "previous_title": "connecting_windows_paragraph_11.2",
-    "next_title": "connecting_paragraph_12",
+    "previous_title": "connecting_windows_paragraph_12.2",
+    "next_title": "connecting_paragraph_13",
     "OS": "windows",
     "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
index 69db57957dc..e45f4e63b85 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
@@ -4,6 +4,7 @@ You've generated a public/private key pair with PuTTYgen and have an
 approved account on the VSC clusters. The next step is to setup the
 connection to (one of) the HPC.
 In the screenshots, we show the setup for user
+"vsc20167"
 to the HPC cluster via the login node "login.hpc.ugent.be".
 1.  Start the PuTTY executable putty.exe in your directory
     C:\Program Files (x86)\PuTTY and the configuration screen will pop
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
index ef4de8bd8e4..8b6b6f698d1 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
@@ -1,6 +1,7 @@
 {
     "main_title": "connecting",
     "subtitle": "Open-a-Terminal",
+    "source_file": "../../mkdocs/docs/HPC/connecting.md",
     "title_depth": 3,
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",

From 9e297b18ef9827a20a1283053ad49c3e081044e7 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 29 Aug 2024 15:29:52 +0200
Subject: [PATCH 136/152] new test for links

---
 scripts/HPC_chatbot_preprocessor/README.md    |  6 +-
 .../chatbot_parser.py                         | 89 ++++++++++++++-----
 .../account/account_paragraph_1_metadata.json |  2 +-
 .../connecting_paragraph_15_metadata.json     |  2 +-
 .../connecting_paragraph_3_metadata.json      |  2 +-
 .../connecting_paragraph_8_metadata.json      |  2 +-
 .../account_linux_paragraph_4.1_metadata.json |  2 +-
 .../account_linux_paragraph_7.1_metadata.json |  2 +-
 ...necting_linux_paragraph_13.1_metadata.json |  2 +-
 ...necting_linux_paragraph_13.6_metadata.json |  4 +-
 ...nnecting_linux_paragraph_5.1_metadata.json |  2 +-
 .../account_macos_paragraph_4.1_metadata.json |  2 +-
 .../account_macos_paragraph_7.1_metadata.json |  2 +-
 ...ccount_windows_paragraph_4.1_metadata.json |  2 +-
 ...ccount_windows_paragraph_4.2_metadata.json |  4 +-
 ...ccount_windows_paragraph_4.3_metadata.json |  2 +-
 ...ccount_windows_paragraph_6.2_metadata.json |  4 +-
 ...ecting_windows_paragraph_4.1_metadata.json |  2 +-
 .../tests/test_links.py                       | 69 ++++++++++++++
 19 files changed, 158 insertions(+), 44 deletions(-)
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_links.py

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 27c1bf3fea6..96a99498451 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -172,7 +172,7 @@ Any comments within the markdown files (for example TODO's) should follow the fo
 Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such.
 
 ```
-<!--INPUT_FOR_BOTyour comment for the bot-->
+<!--INPUT_FOR_BOT: your comment for the bot-->
 ```
 
 This will be reworked to
@@ -190,3 +190,7 @@ Due to the nature of this script, it can generate large directories with very lo
 ### Markdown lists
 
 The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle  list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays.
+
+### Links
+
+Part of the metadata of the parser are links. In order for the links to be built up in the right way, links to external sites should always start with either `https://` or `http://`.
diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 3129ccaf566..9aa7dc972e5 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -104,7 +104,7 @@
 METADATA_EXTENSION = "_metadata"
 
 # Marker for comments for the bot
-INPUT_FOR_BOT = "INPUT_FOR_BOT"
+INPUT_FOR_BOT = "INPUT_FOR_BOT: "
 
 # Standard strings for verbose output
 LINE = "------------------------------------------------------------------------------------------------------\n"
@@ -138,7 +138,46 @@ def check_for_title(line, in_code_block, curr_dirs, options):
         return 0
 
 
-def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
+def make_valid_link(link, main_title, is_linux_tutorial):
+    """
+    Function that converts a string to a valid link to be used in the metadata
+
+    :param link: the input string to be turned into a valid link
+    :param main_title: the main title of the file that contains the link
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
+    :return link: the valid link
+    """
+
+    # ugly fix for problem with links
+    linux_tutorial_files = ["beyond_the_basics", "common_pitfalls", "getting_started", "hpc_infrastructure", "index", "manipulating_files_and_directories", "navigating", "uploading_files"]
+    if is_linux_tutorial and any([linux_tutorial_files[i] in link for i in range(len(linux_tutorial_files))]):
+        linux_part = LINUX_TUTORIAL + '/'
+    else:
+        linux_part = ""
+
+    if link.startswith('http://') or link.startswith('https://') or link.startswith('mailto:'):
+        pass
+    else:
+        if link.startswith("./"):
+            link = link.replace('./', '')
+        elif link.startswith("../"):
+            link = link.replace('../', '')
+
+        if link.startswith("#"):
+            link = DOCS_URL + '/' + linux_part + main_title + "/" + link
+        elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]):
+            link = DOCS_URL + '/' + linux_part + link.replace(".md", "")
+        elif '.md#' in link:
+            link = DOCS_URL + '/' + linux_part + link.replace(".md", "/")
+        else:
+            link = DOCS_URL + '/' + linux_part + link
+
+    link = link.replace('index/', '').replace('/index', '')
+
+    return link
+
+
+def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_linux_tutorial):
     """
     function that replaces certain markdown structures with the equivalent used on the website
 
@@ -146,12 +185,13 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     :param linklist: the list used to store links that need to be printed at the end of the file
     :param in_code_block: boolean indicating whether the current line is part of a code block
     :param main_title: the main title of the file that is being processed
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
     :return curr_line: the adapted current line
     :return linklist: the updated linklist
     """
 
     # replace images with an empty line
-    if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[]\(img/.*?.png\)', curr_line):
+    if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[.*?]\(img/.*?\.png\)', curr_line):
         curr_line = ""
 
     # replace links with a reference
@@ -159,13 +199,8 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     if matches:
         for match in matches:
             curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER)
-            if ".md" not in match[1]:
-                if "#" not in match[1]:
-                    linklist.append(match[1])
-                else:
-                    linklist.append(DOCS_URL + "/" + main_title.replace(".md", "") + "/" + match[1])
-            else:
-                linklist.append(DOCS_URL + "/" + match[1].replace(".md", "/").replace("index", "").rstrip("/"))
+
+            linklist.append(make_valid_link(match[1], main_title, is_linux_tutorial))
 
     # codeblock (with ``` -> always stands on a separate line, so line can be dropped)
     if '```' in curr_line:
@@ -238,13 +273,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title):
     return curr_line, linklist
 
 
-def split_text(file, main_title, options, current_paragraph_number=-1, OS=GENERIC):
+def split_text(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC):
     """
     Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata
 
     :param file: the filepath of the file to be split
     :param main_title: the main title of the file
     :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
     :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level
     :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level
     :return paragraphs_text: dictionary containing the split sections of text
@@ -253,18 +289,19 @@ def split_text(file, main_title, options, current_paragraph_number=-1, OS=GENERI
     """
 
     if options[SPLIT_ON_TITLES]:
-        return split_on_titles(file, main_title, options)
+        return split_on_titles(file, main_title, options, is_linux_tutorial)
     elif options[SPLIT_ON_PARAGRAPHS]:
-        return split_on_paragraphs(file, main_title, options, current_paragraph_number, OS)
+        return split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number, OS)
 
 
-def split_on_titles(file, main_title, options):
+def split_on_titles(file, main_title, options, is_linux_tutorial):
     """
     Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata
 
     :param file: the filepath of the file to be split
     :param main_title: the main title of the file
     :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
     :return paragraphs_text: dictionary containing the split sections of text
     :return paragraphs_metadata: dictionary containing the metadata of each split section of text
     :return subtitle_order: list containing all encountered subtitles in order of appearance
@@ -356,7 +393,7 @@ def split_on_titles(file, main_title, options):
 
                 # line is not a title
                 elif after_first_title:
-                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
+                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
                     if line != "\n":
                         current_paragraph += line
 
@@ -366,7 +403,7 @@ def split_on_titles(file, main_title, options):
                     last_dir = curr_dirs[last_title_level]
             else:
                 previous_contained_if = True
-                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
+                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
                 if line != "\n":
                     current_paragraph += line
 
@@ -384,13 +421,14 @@ def split_on_titles(file, main_title, options):
     return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
 
 
-def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, OS=GENERIC):
+def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC):
     """
     Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata
 
     :param file: the filepath of the file to be split
     :param main_title: the main title of the file
     :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
     :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph
     :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph
     :return paragraphs_text: dictionary containing the split sections of text
@@ -524,12 +562,12 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
                     # make a new title
                     metadata_title = make_valid_title(line[title_level + 1:-1])
 
-                    line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title)
+                    line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title, is_linux_tutorial)
                     current_paragraph += line
 
                 # line is not a title or the beginning of a new paragraph
                 elif line != "\n" or previous_contained_if:
-                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
+                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
                     current_paragraph += line
 
                 # keep track of title level and directory to write to metadata upon discovering a new subtitle
@@ -538,7 +576,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1,
                     last_dir = curr_dirs[last_title_level]
             else:
                 previous_contained_if = True
-                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title)
+                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
                 current_paragraph += line
 
     # create a title for the last paragraph
@@ -799,7 +837,7 @@ def make_valid_title(title):
     valid_filename = re.sub(invalid_chars, '', title)
 
     # Strip leading/trailing whitespace
-    valid_filename = valid_filename.strip().strip('-').replace(' ', '-')
+    valid_filename = valid_filename.strip().strip('-').replace(' ', '-').replace("--", "-")
 
     return valid_filename
 
@@ -889,7 +927,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe
         os_part = ""
     else:
         os_part = LINK_OS[OS] + "/"
-    metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-')
+    if "index" not in paragraphs_metadata[title][MAIN_TITLE]:
+        metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-')
+    else:
+        metadata[REFERENCE_LINK] = DOCS_URL
 
     # write metadata to file
     with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile:
@@ -964,7 +1005,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
                 writefile.write(jinja_text)
 
             # split in right way
-            _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
+            _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, is_linux_tutorial, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
 
             # prepare variables to fix metadata
             total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:]
@@ -1110,7 +1151,7 @@ def main(options):
             print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)")
 
         # split the text in paragraphs
-        paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options)
+        paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options, is_linux_tutorial)
 
         if options[VERBOSE]:
             print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata")
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
index cdba091d7df..738d24cb42e 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
@@ -5,7 +5,7 @@
     "title_depth": 2,
     "directory": "account",
     "links": {
-        "0": "../sites/hpc_policies"
+        "0": "https://docs.hpc.ugent.be/sites/hpc_policies"
     },
     "parent_title": "",
     "previous_title": null,
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
index ff9c22397d1..74ea0125d71 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
@@ -5,7 +5,7 @@
     "title_depth": 2,
     "directory": "connecting",
     "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync"
+        "0": "https://docs.hpc.ugent.be/linux-tutorial/uploading_files/#copying-faster-with-rsync"
     },
     "parent_title": "",
     "previous_title": "connecting_paragraph_14",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
index e30467d0799..8d6b1696e08 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
@@ -6,7 +6,7 @@
     "directory": "connecting",
     "links": {
         "0": "https://docs.hpc.ugent.be/web_portal",
-        "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node"
+        "1": "https://docs.hpc.ugent.be/troubleshooting/#issues-connecting-to-login-node"
     },
     "parent_title": "",
     "previous_title": "connecting_paragraph_2",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
index 074e7e891ce..38f265cfdcd 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
@@ -5,7 +5,7 @@
     "title_depth": 2,
     "directory": "connecting",
     "links": {
-        "0": "../useful_linux_commands"
+        "0": "https://docs.hpc.ugent.be/useful_linux_commands"
     },
     "parent_title": "",
     "previous_title": "connecting_paragraph_7",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
index bcc0552177d..bc51f39d286 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
     "links": {
-        "0": "../../linux-tutorial"
+        "0": "https://docs.hpc.ugent.be/linux-tutorial"
     },
     "previous_title": "account_paragraph_3",
     "next_title": "account_paragraph_5",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
index 4214d6cb321..2b3633d71e7 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
     "links": {
-        "0": "../connecting"
+        "0": "https://docs.hpc.ugent.be/connecting"
     },
     "previous_title": "account_paragraph_6",
     "next_title": "account_linux_paragraph_7.2",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
index 988c10028d8..6b70790e1e3 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
     "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
+        "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
     },
     "previous_title": "connecting_paragraph_12",
     "next_title": "connecting_linux_paragraph_13.2",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json
index 1fc868ffab2..c7fe6bf6a44 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json
@@ -6,8 +6,8 @@
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
     "links": {
-        "0": "",
-        "1": ""
+        "0": "https://docs.hpc.ugent.be/",
+        "1": "https://docs.hpc.ugent.be/"
     },
     "previous_title": "connecting_linux_paragraph_13.5",
     "next_title": "connecting_linux_paragraph_13.7",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
index 55613bca732..66c5dc4aeff 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
     "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host"
+        "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host"
     },
     "previous_title": "connecting_paragraph_4",
     "next_title": "connecting_linux_paragraph_5.2",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
index 5400014a85c..e3813cb647e 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
     "links": {
-        "0": "../../linux-tutorial"
+        "0": "https://docs.hpc.ugent.be/linux-tutorial"
     },
     "previous_title": "account_paragraph_3",
     "next_title": "account_paragraph_5",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
index f9b6c751fd4..18b3b3675de 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
     "links": {
-        "0": "../connecting"
+        "0": "https://docs.hpc.ugent.be/connecting"
     },
     "previous_title": "account_paragraph_6",
     "next_title": "account_macos_paragraph_7.2",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
index 7dc9b50fbdd..dc5a8cb22b9 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
     "links": {
-        "0": "../../linux-tutorial"
+        "0": "https://docs.hpc.ugent.be/linux-tutorial"
     },
     "previous_title": "account_paragraph_3",
     "next_title": "account_windows_paragraph_4.2",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
index 773acaabf23..534ebda0a1c 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
@@ -6,8 +6,8 @@
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
     "links": {
-        "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal",
-        "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair"
+        "0": "https://docs.hpc.ugent.be/connecting/#open-a-terminal",
+        "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair"
     },
     "previous_title": "account_windows_paragraph_4.1",
     "next_title": "account_windows_paragraph_4.3",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
index d803aeadb25..4555638639d 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
@@ -6,7 +6,7 @@
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
     "links": {
-        "0": ""
+        "0": "https://docs.hpc.ugent.be/"
     },
     "previous_title": "account_windows_paragraph_4.2",
     "next_title": "account_windows_paragraph_4.4",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
index 46808447a10..11c69338029 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
@@ -6,8 +6,8 @@
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
     "links": {
-        "0": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair",
-        "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair"
+        "0": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair",
+        "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair"
     },
     "previous_title": "account_windows_paragraph_6.1",
     "next_title": "account_windows_paragraph_6.3",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
index 8b6b6f698d1..d3b7d581c94 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
     "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host"
+        "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host"
     },
     "previous_title": "connecting_paragraph_3",
     "next_title": "connecting_paragraph_5",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py
new file mode 100644
index 00000000000..d1acca1d740
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_links.py
@@ -0,0 +1,69 @@
+import os
+import pytest
+from urllib import request
+from chatbot_parser import main
+import json
+
+whitelist = ["mailto:hpc@ugent.be"]
+slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"]
+
+options_general = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC",
+                   "DESTINATION_DIRECTORY": ".",
+                   "SPLIT_ON_TITLES": False,
+                   "SPLIT_ON_PARAGRAPHS": True,
+                   "MIN_PARAGRAPH_LENGTH": 683,
+                   "MAX_TITLE_DEPTH": 4,
+                   "INCLUDE_LINKS_IN_PLAINTEXT": False,
+                   "DEEP_DIRECTORIES": False,
+                   "VERBOSE": False}
+options_os_specific = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC/linux-tutorial",
+                       "DESTINATION_DIRECTORY": "./linux-tutorial",
+                       "SPLIT_ON_TITLES": False,
+                       "SPLIT_ON_PARAGRAPHS": True,
+                       "MIN_PARAGRAPH_LENGTH": 683,
+                       "MAX_TITLE_DEPTH": 4,
+                       "INCLUDE_LINKS_IN_PLAINTEXT": False,
+                       "DEEP_DIRECTORIES": False,
+                       "VERBOSE": False}
+
+
+@pytest.mark.parametrize("options", [options_general, options_os_specific])
+def test_all_links(options):
+    all_links = {}
+    main(options)
+    broken_links = {}
+    empty_links = {}
+
+    for (dirpath, dirnames, filenames) in os.walk(os.path.join(options['DESTINATION_DIRECTORY'], 'parsed_mds')):
+        for filename in filenames:
+            all_links[filename] = []
+            if filename.endswith('metadata.json'):
+                data = json.load(open(os.path.join(dirpath, filename)))
+                if 'links' in data.keys():
+                    for key in data['links'].keys():
+                        all_links[filename].append(data['links'][key])
+                all_links[filename].append(data['reference_link'].split("#")[0])
+
+    for filename in all_links.keys():
+        all_links[filename] = list(set(all_links[filename]))
+        for link in all_links[filename]:
+            if len(link) != 0:
+                try:
+                    if link not in whitelist and link not in slow_list:
+                        with request.urlopen(link) as res:
+                            if res.status == 200:
+                                pass
+                except:
+                    print("Broken link in " + filename + ": " + link)
+                    if filename in broken_links.keys():
+                        broken_links[filename].append(link)
+                    else:
+                        broken_links[filename] = [link]
+            else:
+                print("Empty link in " + filename)
+                if filename in empty_links.keys():
+                    empty_links[filename].append(link)
+                else:
+                    empty_links[filename] = [link]
+    assert len(empty_links.keys()) == 0
+    assert len(broken_links.keys()) == 0

From b6b861044b0b12f06ba9b59ac7406feef07761e8 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 29 Aug 2024 16:44:15 +0200
Subject: [PATCH 137/152] new test to make sure lists are kept as one section

---
 .../chatbot_parser.py                         |  2 ++
 .../generic/account/account_paragraph_1.txt   |  2 ++
 .../account/account_paragraph_1_metadata.json |  3 ++-
 .../generic/account/account_paragraph_2.txt   |  7 +++--
 .../account/account_paragraph_2_metadata.json |  5 ++--
 .../generic/account/account_paragraph_3.txt   |  8 +++---
 .../account/account_paragraph_3_metadata.json |  3 +++
 .../account/account_linux_paragraph_4.1.txt   |  4 ---
 .../account_linux_paragraph_4.1_metadata.json | 15 -----------
 .../account/account_macos_paragraph_4.1.txt   |  4 ---
 .../account_macos_paragraph_4.1_metadata.json | 15 -----------
 .../account/account_macos_paragraph_5.1.txt   |  3 +++
 .../account/account_macos_paragraph_5.2.txt   |  7 ++---
 .../account/account_macos_paragraph_5.3.txt   |  4 ---
 ...necting_macos_paragraph_13.1_metadata.json |  2 +-
 ...necting_macos_paragraph_13.6_metadata.json |  4 +--
 ...nnecting_macos_paragraph_5.1_metadata.json |  2 +-
 .../account/account_windows_paragraph_4.1.txt |  9 ++++---
 ...ccount_windows_paragraph_4.1_metadata.json |  3 ---
 .../account/account_windows_paragraph_4.2.txt | 13 +++++-----
 .../account/account_windows_paragraph_4.3.txt |  7 -----
 .../tests/test_files/list_file/list_test.md   | 15 +++++++++++
 .../tests/test_lists.py                       | 26 +++++++++++++++++++
 23 files changed, 84 insertions(+), 79 deletions(-)
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_lists.py

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 9aa7dc972e5..f5e5b452ff5 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -503,6 +503,8 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa
                 pass
             elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list:  # line(s) between list entries
                 pass
+            elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', nxt):
+                in_list = True
             elif in_list:
                 if options[VERBOSE]:
                     print("List ended, starting new paragraphs again")
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt
index 1b79fd22391..c3f86ade180 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt
@@ -11,3 +11,5 @@ Brussels University Association, Antwerp University Association and the
 University Colleges-Limburg. The VSC is funded by the Flemish
 Government.
 There are two methods for connecting to HPC-UGent infrastructure:
+- Using a terminal to connect via SSH.
+- Using the web portal
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
index 738d24cb42e..52a3ef55568 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
@@ -5,7 +5,8 @@
     "title_depth": 2,
     "directory": "account",
     "links": {
-        "0": "https://docs.hpc.ugent.be/sites/hpc_policies"
+        "0": "https://docs.hpc.ugent.be/sites/hpc_policies",
+        "1": "https://docs.hpc.ugent.be/web_portal"
     },
     "parent_title": "",
     "previous_title": null,
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt
index 6ecd65e2184..9614ed1447c 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt
@@ -1,6 +1,9 @@
-- Using a terminal to connect via SSH.
-- Using the web portal
 The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required).
 If you would like use a terminal with SSH as this gives you more flexibility continue reading.
 However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account.
 Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal.
+The HPC-UGent infrastructure clusters use public/private key pairs for user authentication
+(rather than passwords). Technically, the private key is stored on your
+local computer and always stays there; the public key is stored on the HPC.
+Access to the HPC is granted to anyone who can prove to have access to the
+corresponding private key on his local computer.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
index 0b22e2986a0..a41a1993674 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
@@ -5,9 +5,8 @@
     "title_depth": 2,
     "directory": "account",
     "links": {
-        "0": "https://docs.hpc.ugent.be/web_portal",
-        "1": "https://docs.hpc.ugent.be/account/#applying-for-the-account",
-        "2": "https://docs.hpc.ugent.be/web_portal"
+        "0": "https://docs.hpc.ugent.be/account/#applying-for-the-account",
+        "1": "https://docs.hpc.ugent.be/web_portal"
     },
     "parent_title": "",
     "previous_title": "account_paragraph_1",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
index e4946869273..963b35c090b 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
@@ -1,8 +1,3 @@
-The HPC-UGent infrastructure clusters use public/private key pairs for user authentication
-(rather than passwords). Technically, the private key is stored on your
-local computer and always stays there; the public key is stored on the HPC.
-Access to the HPC is granted to anyone who can prove to have access to the
-corresponding private key on his local computer.
 How do SSH keys work?
 -   an SSH public/private key pair can be seen as a lock and a key
 -   the SSH public key is equivalent with a lock: you give it to the
@@ -15,3 +10,6 @@ How do SSH keys work?
     locks (SSH public keys) attached to it, and you only need to open
     one lock with the corresponding key (SSH private key) to open
     the door (log in to the account).
+Since all VSC clusters use Linux as their main operating system, you
+will need to get acquainted with using the command-line interface and
+using the terminal (see tutorial).
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
index bd2f73195a6..4df622cc4aa 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
@@ -4,6 +4,9 @@
     "source_file": "../../mkdocs/docs/HPC/account.md",
     "title_depth": 3,
     "directory": "account",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/linux-tutorial"
+    },
     "parent_title": "",
     "previous_title": "account_paragraph_2",
     "next_title": "account_paragraph_4",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt
deleted file mode 100644
index 3a282a73a15..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-How do SSH keys work
-Since all VSC clusters use Linux as their main operating system, you
-will need to get acquainted with using the command-line interface and
-using the terminal (see tutorial).
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
deleted file mode 100644
index bc51f39d286..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "How-do-SSH-keys-work",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/linux-tutorial"
-    },
-    "previous_title": "account_paragraph_3",
-    "next_title": "account_paragraph_5",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#how-do-ssh-keys-work"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt
deleted file mode 100644
index 3a282a73a15..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-How do SSH keys work
-Since all VSC clusters use Linux as their main operating system, you
-will need to get acquainted with using the command-line interface and
-using the terminal (see tutorial).
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
deleted file mode 100644
index e3813cb647e..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "How-do-SSH-keys-work",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/linux-tutorial"
-    },
-    "previous_title": "account_paragraph_3",
-    "next_title": "account_paragraph_5",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#how-do-ssh-keys-work"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt
index f3483fcaef1..d96c80b42a2 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt
@@ -10,3 +10,6 @@ other secure network services between two networked computers. In short,
 ssh provides a secure connection between 2 computers via insecure
 channels (Network, Internet, telephone lines, ...).
 "Secure" means that:
+1.  the User is authenticated to the System; and
+2.  the System is authenticated to the User; and
+3.  all data is encrypted during transfer.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt
index 5189a953002..318f913fba3 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt
@@ -1,6 +1,3 @@
-1.  the User is authenticated to the System; and
-2.  the System is authenticated to the User; and
-3.  all data is encrypted during transfer.
 OpenSSH is a FREE implementation of the SSH connectivity protocol.  comes
 with its own implementation of OpenSSH, so you don't need to install any
 third-party software to use it. Just open a terminal window and jump in!
@@ -11,3 +8,7 @@ $ ssh -V
 OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017
 To access the clusters and transfer your files, you will use the
 following commands:
+1.  ssh-keygen: to generate the SSH key pair (public + private key);
+2.  ssh: to open a shell on a remote machine;
+3.  sftp: a secure equivalent of ftp;
+4.  scp: a secure equivalent of the remote copy command rcp.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
index a8c087f818b..5df90a3dd7c 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
@@ -1,7 +1,3 @@
-1.  ssh-keygen: to generate the SSH key pair (public + private key);
-2.  ssh: to open a shell on a remote machine;
-3.  sftp: a secure equivalent of ftp;
-4.  scp: a secure equivalent of the remote copy command rcp.
 Generate a public/private key pair with OpenSSH
 A key pair might already be present in the default location inside your
 home directory. Therefore, we first check if a key is available with the
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
index 9ec843ff0aa..79157005600 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
     "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
+        "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
     },
     "previous_title": "connecting_paragraph_12",
     "next_title": "connecting_macos_paragraph_13.2",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json
index fe899ad9dbc..9b08fbde549 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json
@@ -6,8 +6,8 @@
     "directory": "connecting",
     "parent_title": "Transfer-Files-tofrom-the-HPC",
     "links": {
-        "0": "",
-        "1": ""
+        "0": "https://docs.hpc.ugent.be/",
+        "1": "https://docs.hpc.ugent.be/"
     },
     "previous_title": "connecting_macos_paragraph_13.5",
     "next_title": "connecting_macos_paragraph_13.7",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
index 85b088b0e8c..f928fbfcdd6 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
@@ -6,7 +6,7 @@
     "directory": "connecting",
     "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
     "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host"
+        "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host"
     },
     "previous_title": "connecting_paragraph_4",
     "next_title": "connecting_macos_paragraph_5.2",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
index 1e70493305f..93ca7ac9da5 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
@@ -1,7 +1,4 @@
 How do SSH keys work
-Since all VSC clusters use Linux as their main operating system, you
-will need to get acquainted with using the command-line interface and
-using the terminal (see tutorial).
 A typical Windows environment does not come with pre-installed software
 to connect and run command-line executables on a HPC. Some tools need to be
 installed on your Windows machine first, before we can start the actual
@@ -13,3 +10,9 @@ PuTTYgen executable and run it. This can be useful in situations where
 you do not have the required permissions to install software on the
 computer you are using. Alternatively, an installation package is also
 available.
+You can download PuTTY from the official address:
+<https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html>. You
+probably want the 64-bits version. If you can install software on your
+computer, you can use the "Package files", if not, you can download and
+use putty.exe and puttygen.exe in the "Alternative binary files"
+section.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
index dc5a8cb22b9..e0024f40d55 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
@@ -5,9 +5,6 @@
     "title_depth": 3,
     "directory": "account",
     "parent_title": "Getting-ready-to-request-an-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/linux-tutorial"
-    },
     "previous_title": "account_paragraph_3",
     "next_title": "account_windows_paragraph_4.2",
     "OS": "windows",
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
index 1a30a219fec..cebd1da3baf 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
@@ -1,12 +1,13 @@
-You can download PuTTY from the official address:
-<https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html>. You
-probably want the 64-bits version. If you can install software on your
-computer, you can use the "Package files", if not, you can download and
-use putty.exe and puttygen.exe in the "Alternative binary files"
-section.
 The PuTTY package consists of several components, but we'll only use
 two:
 1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal)
 2.  PuTTYgen: an RSA and DSA key generation utility (to generate a key pair,
     see Generate a public/private key pair)
 Generating a public/private key pair
+Before requesting a VSC account, you need to generate a pair of ssh
+keys. You need 2 keys, a public and a private key. You can visualise the
+public key as a lock to which only you have the key (your private key).
+You can send a copy of your lock to anyone without any problems, because
+only you can open it, as long as you keep your private key secure. To
+generate a public/private key pair, you can use the PuTTYgen key
+generator.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
index b082d381a64..6e65300562d 100644
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
+++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
@@ -1,10 +1,3 @@
-Before requesting a VSC account, you need to generate a pair of ssh
-keys. You need 2 keys, a public and a private key. You can visualise the
-public key as a lock to which only you have the key (your private key).
-You can send a copy of your lock to anyone without any problems, because
-only you can open it, as long as you keep your private key secure. To
-generate a public/private key pair, you can use the PuTTYgen key
-generator.
 Start PuTTYgen.exe it and follow these steps:
 1.  In "Parameters" (at the bottom of the window), choose "RSA" and set the number of
     bits in the key to 4096.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
new file mode 100644
index 00000000000..1d1d3c210e8
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
@@ -0,0 +1,15 @@
+# Title
+
+Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.
+
+1. First entry
+
+2. Second entry
+
+3. Third entry
+
+    ![image](img/an_image_for_the_third_entry.png)
+
+4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.
+
+And now the text continues like normal in a new section.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
new file mode 100644
index 00000000000..4975856a75f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
@@ -0,0 +1,26 @@
+import pytest
+from chatbot_parser import split_on_paragraphs
+
+
+@pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [
+    ("./test_files/list_file/list_test.md",
+     "list_test.md",
+     {
+      "SOURCE_DIRECTORY": "./test_files/list_file",
+      "DESTINATION_DIRECTORY": "./test_files/list_file",
+      "SPLIT_ON_TITLES": False,
+      "SPLIT_ON_PARAGRAPHS": True,
+      "MIN_PARAGRAPH_LENGTH": 100,
+      "MAX_TITLE_DEPTH": 4,
+      "INCLUDE_LINKS_IN_PLAINTEXT": False,
+      "DEEP_DIRECTORIES": False,
+      "VERBOSE": False
+     },
+     False,
+     {
+         'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n",
+         'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'}
+     )
+])
+def test_links(file, main_title, options, is_linux_tutorial, expected_text):
+    assert split_on_paragraphs(file, main_title, options, is_linux_tutorial)[1] == expected_text

From 57a21397a869cbcffb6fab5f4d14496043b9b174 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Thu, 29 Aug 2024 16:49:24 +0200
Subject: [PATCH 138/152] updated test_file for list test

---
 .../tests/test_files/list_file/list_test.md   |  2 +-
 .../tests/test_lists.py                       | 23 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
index 1d1d3c210e8..1e18a1495d5 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
@@ -2,7 +2,7 @@
 
 Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.
 
-1. First entry
+1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.
 
 2. Second entry
 
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
index 4975856a75f..06e56a5cb2c 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
@@ -6,20 +6,21 @@
     ("./test_files/list_file/list_test.md",
      "list_test.md",
      {
-      "SOURCE_DIRECTORY": "./test_files/list_file",
-      "DESTINATION_DIRECTORY": "./test_files/list_file",
-      "SPLIT_ON_TITLES": False,
-      "SPLIT_ON_PARAGRAPHS": True,
-      "MIN_PARAGRAPH_LENGTH": 100,
-      "MAX_TITLE_DEPTH": 4,
-      "INCLUDE_LINKS_IN_PLAINTEXT": False,
-      "DEEP_DIRECTORIES": False,
-      "VERBOSE": False
+         "SOURCE_DIRECTORY": "./test_files/list_file",
+         "DESTINATION_DIRECTORY": "./test_files/list_file",
+         "SPLIT_ON_TITLES": False,
+         "SPLIT_ON_PARAGRAPHS": True,
+         "MIN_PARAGRAPH_LENGTH": 100,
+         "MAX_TITLE_DEPTH": 4,
+         "INCLUDE_LINKS_IN_PLAINTEXT": False,
+         "DEEP_DIRECTORIES": False,
+         "VERBOSE": False
      },
      False,
      {
-         'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n",
-         'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'}
+         'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n",
+         'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'
+     }
      )
 ])
 def test_links(file, main_title, options, is_linux_tutorial, expected_text):

From 170a10cb9eaa0d92482daef766dd2b0918e9a4cd Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 09:53:12 +0200
Subject: [PATCH 139/152] dropped <> around links and started new function to
 calculate length of paragraphs

---
 .../HPC_chatbot_preprocessor/chatbot_parser.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index f5e5b452ff5..cff487f8589 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -235,6 +235,10 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_
             elif re.fullmatch(r'!--.*?--', content):
                 curr_line = re.sub(r'<.*?>', "", curr_line)
 
+            # drop the <> around links
+            elif re.match(r'http://', content) or re.match(r'https://', content):
+                curr_line = re.sub(r'<' + content + '>', content, curr_line )
+
             # keep the rest
             else:
                 pass
@@ -527,7 +531,7 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa
                 title_level = check_for_title(line, in_code_block, curr_dirs, options)
 
                 # check whether a new paragraph should be started
-                if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block and not in_list:
+                if line == "\n" and paragraph_long_enough(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph), options) and not in_code_block and not in_list:
 
                     # create a title for the previous paragraph
                     if current_paragraph_number == -1:
@@ -602,6 +606,18 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa
     return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
 
 
+def paragraph_long_enough(paragraph, options):
+    """
+    Function that checks if the paragraph is long enough to be split of
+
+    :param paragraph: current paragraph
+    :param options: dictionary containing the options given by the user
+    :return:
+    """
+    # TODO: change this into something that uses the tokenizer
+    return len(paragraph) >= options[MIN_PARAGRAPH_LENGTH]
+
+
 def write_metadata(main_title, subtitle, links, title_level, directory, source_file):
     """
     Function that writes metadata about a text section to a dictionary

From 04efff6ca40a3b19f694e8168d83a77d45a1078b Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 10:10:49 +0200
Subject: [PATCH 140/152] removed parsed mds

---
 .../generic/account/account_paragraph_1.txt   | 15 -------
 .../generic/account/account_paragraph_10.txt  | 18 --------
 .../account_paragraph_10_metadata.json        | 12 ------
 .../generic/account/account_paragraph_12.txt  | 14 -------
 .../account_paragraph_12_metadata.json        | 12 ------
 .../account/account_paragraph_1_metadata.json | 16 -------
 .../generic/account/account_paragraph_2.txt   |  9 ----
 .../account/account_paragraph_2_metadata.json | 16 -------
 .../generic/account/account_paragraph_3.txt   | 15 -------
 .../account/account_paragraph_3_metadata.json | 15 -------
 .../generic/account/account_paragraph_8.txt   | 14 -------
 .../account/account_paragraph_8_metadata.json | 12 ------
 .../compiling_your_software_paragraph_1.txt   | 10 -----
 .../compiling_your_software_paragraph_10.txt  | 19 ---------
 ...g_your_software_paragraph_10_metadata.json | 11 -----
 .../compiling_your_software_paragraph_11.txt  | 20 ---------
 ...g_your_software_paragraph_11_metadata.json | 11 -----
 .../compiling_your_software_paragraph_12.txt  |  9 ----
 ...g_your_software_paragraph_12_metadata.json | 11 -----
 ...ng_your_software_paragraph_1_metadata.json | 11 -----
 .../compiling_your_software_paragraph_2.txt   | 13 ------
 ...ng_your_software_paragraph_2_metadata.json | 11 -----
 .../compiling_your_software_paragraph_3.txt   | 13 ------
 ...ng_your_software_paragraph_3_metadata.json | 11 -----
 .../compiling_your_software_paragraph_4.txt   | 15 -------
 ...ng_your_software_paragraph_4_metadata.json | 11 -----
 .../compiling_your_software_paragraph_5.txt   | 16 -------
 ...ng_your_software_paragraph_5_metadata.json | 11 -----
 .../compiling_your_software_paragraph_6.txt   | 30 -------------
 ...ng_your_software_paragraph_6_metadata.json | 14 -------
 .../compiling_your_software_paragraph_7.txt   | 15 -------
 ...ng_your_software_paragraph_7_metadata.json | 11 -----
 .../compiling_your_software_paragraph_8.txt   | 19 ---------
 ...ng_your_software_paragraph_8_metadata.json | 11 -----
 .../compiling_your_software_paragraph_9.txt   | 32 --------------
 ...ng_your_software_paragraph_9_metadata.json | 11 -----
 .../connecting/connecting_paragraph_1.txt     | 18 --------
 .../connecting/connecting_paragraph_10.txt    | 24 -----------
 .../connecting_paragraph_10_metadata.json     | 12 ------
 .../connecting/connecting_paragraph_15.txt    |  7 ----
 .../connecting_paragraph_15_metadata.json     | 15 -------
 .../connecting/connecting_paragraph_16.txt    | 11 -----
 .../connecting_paragraph_16_metadata.json     | 16 -------
 .../connecting_paragraph_1_metadata.json      | 15 -------
 .../connecting/connecting_paragraph_2.txt     | 18 --------
 .../connecting_paragraph_2_metadata.json      | 12 ------
 .../connecting/connecting_paragraph_3.txt     |  9 ----
 .../connecting_paragraph_3_metadata.json      | 16 -------
 .../connecting/connecting_paragraph_6.txt     | 14 -------
 .../connecting_paragraph_6_metadata.json      | 12 ------
 .../connecting/connecting_paragraph_7.txt     | 21 ----------
 .../connecting_paragraph_7_metadata.json      | 12 ------
 .../connecting/connecting_paragraph_8.txt     | 12 ------
 .../connecting_paragraph_8_metadata.json      | 15 -------
 .../connecting/connecting_paragraph_9.txt     | 19 ---------
 .../connecting_paragraph_9_metadata.json      | 12 ------
 .../account/account_linux_paragraph_11.1.txt  | 17 --------
 ...account_linux_paragraph_11.1_metadata.json | 15 -------
 .../account/account_linux_paragraph_5.1.txt   | 14 -------
 .../account_linux_paragraph_5.1_metadata.json | 12 ------
 .../account/account_linux_paragraph_5.2.txt   | 14 -------
 .../account_linux_paragraph_5.2_metadata.json | 12 ------
 .../account/account_linux_paragraph_5.3.txt   | 16 -------
 .../account_linux_paragraph_5.3_metadata.json | 12 ------
 .../account/account_linux_paragraph_5.4.txt   | 13 ------
 .../account_linux_paragraph_5.4_metadata.json | 12 ------
 .../account/account_linux_paragraph_5.5.txt   |  6 ---
 .../account_linux_paragraph_5.5_metadata.json | 12 ------
 .../account/account_linux_paragraph_6.1.txt   |  1 -
 .../account_linux_paragraph_6.1_metadata.json | 12 ------
 .../account/account_linux_paragraph_7.1.txt   | 14 -------
 .../account_linux_paragraph_7.1_metadata.json | 15 -------
 .../account/account_linux_paragraph_7.2.txt   |  8 ----
 .../account_linux_paragraph_7.2_metadata.json | 12 ------
 .../account/account_linux_paragraph_9.1.txt   |  7 ----
 .../account_linux_paragraph_9.1_metadata.json | 12 ------
 .../connecting_linux_paragraph_11.1.txt       | 37 ----------------
 ...necting_linux_paragraph_11.1_metadata.json | 12 ------
 .../connecting_linux_paragraph_12.1.txt       |  6 ---
 ...necting_linux_paragraph_12.1_metadata.json | 12 ------
 .../connecting_linux_paragraph_13.1.txt       | 12 ------
 ...necting_linux_paragraph_13.1_metadata.json | 15 -------
 .../connecting_linux_paragraph_13.2.txt       | 17 --------
 ...necting_linux_paragraph_13.2_metadata.json | 12 ------
 .../connecting_linux_paragraph_13.3.txt       | 22 ----------
 ...necting_linux_paragraph_13.3_metadata.json | 12 ------
 .../connecting_linux_paragraph_13.4.txt       | 14 -------
 ...necting_linux_paragraph_13.4_metadata.json | 12 ------
 .../connecting_linux_paragraph_13.5.txt       | 14 -------
 ...necting_linux_paragraph_13.5_metadata.json | 12 ------
 .../connecting_linux_paragraph_13.6.txt       | 18 --------
 ...necting_linux_paragraph_13.6_metadata.json | 16 -------
 .../connecting_linux_paragraph_14.1.txt       | 10 -----
 ...necting_linux_paragraph_14.1_metadata.json | 12 ------
 .../connecting_linux_paragraph_5.1.txt        | 12 ------
 ...nnecting_linux_paragraph_5.1_metadata.json | 15 -------
 .../connecting_linux_paragraph_5.2.txt        |  4 --
 ...nnecting_linux_paragraph_5.2_metadata.json | 12 ------
 .../account/account_macos_paragraph_11.1.txt  | 17 --------
 ...account_macos_paragraph_11.1_metadata.json | 15 -------
 .../account/account_macos_paragraph_5.1.txt   | 15 -------
 .../account_macos_paragraph_5.1_metadata.json | 12 ------
 .../account/account_macos_paragraph_5.2.txt   | 14 -------
 .../account_macos_paragraph_5.2_metadata.json | 12 ------
 .../account/account_macos_paragraph_5.3.txt   | 16 -------
 .../account_macos_paragraph_5.3_metadata.json | 12 ------
 .../account/account_macos_paragraph_5.4.txt   | 13 ------
 .../account_macos_paragraph_5.4_metadata.json | 12 ------
 .../account/account_macos_paragraph_5.5.txt   |  6 ---
 .../account_macos_paragraph_5.5_metadata.json | 12 ------
 .../account/account_macos_paragraph_6.1.txt   |  1 -
 .../account_macos_paragraph_6.1_metadata.json | 12 ------
 .../account/account_macos_paragraph_7.1.txt   | 14 -------
 .../account_macos_paragraph_7.1_metadata.json | 15 -------
 .../account/account_macos_paragraph_7.2.txt   |  7 ----
 .../account_macos_paragraph_7.2_metadata.json | 12 ------
 .../account/account_macos_paragraph_9.1.txt   | 12 ------
 .../account_macos_paragraph_9.1_metadata.json | 12 ------
 .../connecting_macos_paragraph_11.1.txt       | 37 ----------------
 ...necting_macos_paragraph_11.1_metadata.json | 12 ------
 .../connecting_macos_paragraph_12.1.txt       |  6 ---
 ...necting_macos_paragraph_12.1_metadata.json | 12 ------
 .../connecting_macos_paragraph_13.1.txt       | 12 ------
 ...necting_macos_paragraph_13.1_metadata.json | 15 -------
 .../connecting_macos_paragraph_13.2.txt       | 17 --------
 ...necting_macos_paragraph_13.2_metadata.json | 12 ------
 .../connecting_macos_paragraph_13.3.txt       | 22 ----------
 ...necting_macos_paragraph_13.3_metadata.json | 12 ------
 .../connecting_macos_paragraph_13.4.txt       | 14 -------
 ...necting_macos_paragraph_13.4_metadata.json | 12 ------
 .../connecting_macos_paragraph_13.5.txt       | 14 -------
 ...necting_macos_paragraph_13.5_metadata.json | 12 ------
 .../connecting_macos_paragraph_13.6.txt       | 18 --------
 ...necting_macos_paragraph_13.6_metadata.json | 16 -------
 .../connecting_macos_paragraph_14.1.txt       | 15 -------
 ...necting_macos_paragraph_14.1_metadata.json | 12 ------
 .../connecting_macos_paragraph_14.2.txt       |  3 --
 ...necting_macos_paragraph_14.2_metadata.json | 12 ------
 .../connecting_macos_paragraph_5.1.txt        | 10 -----
 ...nnecting_macos_paragraph_5.1_metadata.json | 15 -------
 .../connecting_macos_paragraph_5.2.txt        |  7 ----
 ...nnecting_macos_paragraph_5.2_metadata.json | 12 ------
 .../account_windows_paragraph_11.1.txt        | 17 --------
 ...count_windows_paragraph_11.1_metadata.json | 15 -------
 .../account/account_windows_paragraph_4.1.txt | 18 --------
 ...ccount_windows_paragraph_4.1_metadata.json | 12 ------
 .../account/account_windows_paragraph_4.2.txt | 13 ------
 ...ccount_windows_paragraph_4.2_metadata.json | 16 -------
 .../account/account_windows_paragraph_4.3.txt | 30 -------------
 ...ccount_windows_paragraph_4.3_metadata.json | 15 -------
 .../account/account_windows_paragraph_4.4.txt |  2 -
 ...ccount_windows_paragraph_4.4_metadata.json | 12 ------
 .../account/account_windows_paragraph_6.1.txt | 13 ------
 ...ccount_windows_paragraph_6.1_metadata.json | 12 ------
 .../account/account_windows_paragraph_6.2.txt | 11 -----
 ...ccount_windows_paragraph_6.2_metadata.json | 16 -------
 .../account/account_windows_paragraph_6.3.txt |  5 ---
 ...ccount_windows_paragraph_6.3_metadata.json | 12 ------
 .../account/account_windows_paragraph_9.1.txt |  8 ----
 ...ccount_windows_paragraph_9.1_metadata.json | 12 ------
 .../connecting_windows_paragraph_11.1.txt     |  9 ----
 ...cting_windows_paragraph_11.1_metadata.json | 12 ------
 .../connecting_windows_paragraph_12.1.txt     | 22 ----------
 ...cting_windows_paragraph_12.1_metadata.json | 12 ------
 .../connecting_windows_paragraph_12.2.txt     | 11 -----
 ...cting_windows_paragraph_12.2_metadata.json | 12 ------
 .../connecting_windows_paragraph_12.3.txt     |  6 ---
 ...cting_windows_paragraph_12.3_metadata.json | 12 ------
 .../connecting_windows_paragraph_4.1.txt      | 42 -------------------
 ...ecting_windows_paragraph_4.1_metadata.json | 15 -------
 170 files changed, 2310 deletions(-)
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json

diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt
deleted file mode 100644
index c3f86ade180..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Getting an HPC Account
-Getting ready to request an account
-All users of AUGent can request
-an
-account on the HPC, which is part of the Flemish Supercomputing Centre (VSC).
-See HPC policies for more information on who is entitled to an account.
-The VSC, abbreviation of Flemish Supercomputer Centre, is a virtual
-supercomputer centre. It is a partnership between the five Flemish
-associations: the Association KU Leuven, Ghent University Association,
-Brussels University Association, Antwerp University Association and the
-University Colleges-Limburg. The VSC is funded by the Flemish
-Government.
-There are two methods for connecting to HPC-UGent infrastructure:
-- Using a terminal to connect via SSH.
-- Using the web portal
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
deleted file mode 100644
index 7b0a39279e4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-After you have uploaded your public key you will receive an e-mail with
-a link to confirm your e-mail address. After confirming your e-mail
-address the VSC staff will review and if applicable approve your
-account.
-Welcome e-mail
-Within one day, you should receive a Welcome e-mail with your VSC
-account details.
-Dear (Username), 
-Your VSC-account has been approved by an administrator.
-Your vsc-username is vsc40000
-Your account should be fully active within one hour.
-To check or update your account information please visit
-https://account.vscentrum.be/
-For further info please visit https://www.vscentrum.be/user-portal
-Kind regards,
--- The VSC administrators
-Now, you can start using the HPC. You can always look up your VSC id later
-by visiting <https://account.vscentrum.be>.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json
deleted file mode 100644
index e417029c16f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Welcome-e-mail",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "",
-    "previous_title": "account_paragraph_9",
-    "next_title": "account_paragraph_11",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/account/#welcome-e-mail"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt
deleted file mode 100644
index 7ecd78e5c9f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Computation Workflow on the HPC
-A typical Computation workflow will be:
-1.  Connect to the HPC
-2.  Transfer your files to the HPC
-3.  Compile your code and test it
-4.  Create a job script
-5.  Submit your job
-6.  Wait while
-    1.  your job gets into the queue
-    2.  your job gets executed
-    3.  your job finishes
-7.  Move your results
-We'll take you through the different tasks one by one in the following
-chapters.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json
deleted file mode 100644
index e43e729aa74..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Computation-Workflow-on-the-HPC",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 2,
-    "directory": "account",
-    "parent_title": "",
-    "previous_title": "account_paragraph_11",
-    "next_title": null,
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/account/#computation-workflow-on-the-hpc"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
deleted file mode 100644
index 52a3ef55568..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Getting-ready-to-request-an-account",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 2,
-    "directory": "account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/sites/hpc_policies",
-        "1": "https://docs.hpc.ugent.be/web_portal"
-    },
-    "parent_title": "",
-    "previous_title": null,
-    "next_title": "account_paragraph_2",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt
deleted file mode 100644
index 9614ed1447c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required).
-If you would like use a terminal with SSH as this gives you more flexibility continue reading.
-However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account.
-Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal.
-The HPC-UGent infrastructure clusters use public/private key pairs for user authentication
-(rather than passwords). Technically, the private key is stored on your
-local computer and always stays there; the public key is stored on the HPC.
-Access to the HPC is granted to anyone who can prove to have access to the
-corresponding private key on his local computer.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
deleted file mode 100644
index a41a1993674..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Getting-ready-to-request-an-account",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 2,
-    "directory": "account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/account/#applying-for-the-account",
-        "1": "https://docs.hpc.ugent.be/web_portal"
-    },
-    "parent_title": "",
-    "previous_title": "account_paragraph_1",
-    "next_title": "account_paragraph_3",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
deleted file mode 100644
index 963b35c090b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-How do SSH keys work?
--   an SSH public/private key pair can be seen as a lock and a key
--   the SSH public key is equivalent with a lock: you give it to the
-    VSC and they put it on the door that gives access to your account.
--   the SSH private key is like a physical key: you don't hand it out
-    to other people.
--   anyone who has the key (and the optional password) can unlock the
-    door and log in to the account.
--   the door to your VSC account is special: it can have multiple
-    locks (SSH public keys) attached to it, and you only need to open
-    one lock with the corresponding key (SSH private key) to open
-    the door (log in to the account).
-Since all VSC clusters use Linux as their main operating system, you
-will need to get acquainted with using the command-line interface and
-using the terminal (see tutorial).
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
deleted file mode 100644
index 4df622cc4aa..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "How-do-SSH-keys-work",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/linux-tutorial"
-    },
-    "parent_title": "",
-    "previous_title": "account_paragraph_2",
-    "next_title": "account_paragraph_4",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/account/#how-do-ssh-keys-work"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt
deleted file mode 100644
index 6c5695dfff3..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Applying for the account
-Visit <https://account.vscentrum.be/>
-You will be redirected to our WAYF (Where Are You From) service where
-you have to select your "Home Organisation".
-Select "UGent" in the dropdown box and optionally select "Save my preference"
-and "permanently".
-Click "Confirm"
-You will now be taken to the authentication page of your institute.
-You will now have to log in with CAS using your UGent account.
-You either have a login name of maximum 8 characters, or a (non-UGent)
-email address if you are an external user. In case of problems with your
-UGent password, please visit: <https://password.ugent.be/>. After
-logging in, you may be requested to share your information. Click "Yes,
-continue".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json
deleted file mode 100644
index 6a77c48dbd1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Applying-for-the-account",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 2,
-    "directory": "account",
-    "parent_title": "",
-    "previous_title": "account_paragraph_7",
-    "next_title": "account_paragraph_9",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/account/#applying-for-the-account"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt
deleted file mode 100644
index db1afd43e68..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Compiling and testing your software on the HPC
-All nodes in the HPC cluster are running the "RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty)" 
-Operating system, which is a specific version of Red Hat Enterprise Linux. This means that all the 
-software programs
-(executable) that the end-user wants to run on the HPC first must be
-compiled for RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). It also means that you first have to install all the
-required external software packages on the HPC.
-Most commonly used compilers are already pre-installed on the HPC and can be
-used straight away. Also, many popular external software packages, which
-are regularly used in the scientific community, are also pre-installed.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt
deleted file mode 100644
index d49ba76b01a..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-The "mpi_hello.c" program is a simple source file, written in C with MPI
-library calls.
-Then, check the command line options for *"mpicc" (GNU C-Compiler with
-MPI extensions)*, then we compile and list the contents of the directory
-again:
-mpicc --help
-mpicc -o mpihello mpihello.c
-ls -l
-A new file "hello" has been created. Note that this program has
-"execute" rights.
-Let's test this program on the "login" node first:
-$ ./mpihello
-Hello World from Node 0.
-It seems to work, now run it on the HPC.
-qsub mpihello.pbs
-Compiling a parallel program in Intel Parallel Studio Cluster Edition
-We will now compile the same program, but using the Intel Parallel
-Studio Cluster Edition compilers. We stay in the examples directory for
-this chapter:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json
deleted file mode 100644
index ca0d7d80669..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition",
-    "title_depth": 3,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_9",
-    "next_title": "compiling_your_software_paragraph_11",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt
deleted file mode 100644
index be02d069ac7..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-cd ~/examples/Compiling-and-testing-your-software-on-the-HPC
-We will compile this C/MPI -file into an executable with the Intel
-Parallel Studio Cluster Edition. First, clear the modules (purge) and
-then load the latest "intel" module:
-module purge
-module load intel
-Then, compile and list the contents of the directory again. The Intel
-equivalent of mpicc is mpiicc.
-mpiicc -o mpihello mpihello.c
-ls -l
-Note that the old "mpihello" file has been overwritten. Let's test this
-program on the "login" node first:
-$ ./mpihello
-Hello World from Node 0.
-It seems to work, now run it on the HPC.
-qsub mpihello.pbs
-Note: The AUGent only has a license for the Intel Parallel Studio Cluster
-Edition for a fixed number of users. As such, it might happen that you
-have to wait a few minutes before a floating license becomes available
-for your use.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json
deleted file mode 100644
index 808331a3f9d..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition",
-    "title_depth": 3,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_10",
-    "next_title": "compiling_your_software_paragraph_12",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt
deleted file mode 100644
index 1d37014a426..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Note: The Intel Parallel Studio Cluster Edition contains equivalent
-compilers for all GNU compilers. Hereafter the overview for C, C++ and
-Fortran compilers.
-|             | Sequential Program |           | **Parallel Program (with MPI)** |           |
-|-------------|------------------------|-----------|---------------------------------|-----------|
-|             | GNU                | Intel | GNU                         | Intel |
-| C       | gcc                    | icc       | mpicc                           | mpiicc    |
-| **C++**     | g++                    | icpc      | mpicxx                          | mpiicpc   |
-| Fortran | gfortran               | ifort     | mpif90                          | mpiifort  |
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json
deleted file mode 100644
index d032428daf1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition",
-    "title_depth": 3,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_11",
-    "next_title": null,
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json
deleted file mode 100644
index ec4b55c9a4d..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-and-testing-your-software-on-the-HPC",
-    "title_depth": 1,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": null,
-    "next_title": "compiling_your_software_paragraph_2",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-testing-your-software-on-the-hpc"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt
deleted file mode 100644
index b52639b649d..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-Check the pre-installed software on the HPC
-In order to check all the available modules and their version numbers,
-which are pre-installed on the HPC enter:
-When your required application is not available on the HPC please contact
-any HPC member. Be aware of potential "License Costs". "Open Source"
-software is often preferred.
-Porting your code
-To port a software-program is to translate it from the operating system in
-which it was developed (e.g., Windows 7) to another operating system
-(e.g., Red Hat Enterprise Linux on our HPC) so that it can be used there. Porting implies some
-degree of effort, but not nearly as much as redeveloping the program in
-the new environment. It all depends on how "portable" you wrote your
-code.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json
deleted file mode 100644
index 00750c81d97..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Porting-your-code",
-    "title_depth": 2,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_1",
-    "next_title": "compiling_your_software_paragraph_3",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt
deleted file mode 100644
index f994f0bc148..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-In the simplest case the file or files may simply be copied from one
-machine to the other. However, in many cases the software is installed
-on a computer in a way, which depends upon its detailed hardware,
-software, and setup, with device drivers for particular devices, using
-installed operating system and supporting software components, and using
-different directories.
-In some cases software, usually described as "portable software" is
-specifically designed to run on different computers with compatible
-operating systems and processors without any machine-dependent
-installation; it is sufficient to transfer specified directories and
-their contents. Hardware- and software-specific information is often
-stored in configuration files in specified locations (e.g., the registry
-on machines running MS Windows).
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json
deleted file mode 100644
index 90e7d236beb..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Porting-your-code",
-    "title_depth": 2,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_2",
-    "next_title": "compiling_your_software_paragraph_4",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt
deleted file mode 100644
index f7bf4172b71..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Software, which is not portable in this sense, will have to be
-transferred with modifications to support the environment on the
-destination machine.
-Whilst programming, it would be wise to stick to certain standards
-(e.g., ISO/ANSI/POSIX). This will ease the porting of your code to other
-platforms.
-Porting your code to the RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty) platform is the responsibility of the end-user.
-Compiling and building on the HPC
-Compiling refers to the process of translating code written in some
-programming language, e.g., Fortran, C, or C++, to machine code.
-Building is similar, but includes gluing together the machine code
-resulting from different source files into an executable (or library).
-The text below guides you through some basic problems typical for small
-software projects. For larger projects it is more appropriate to use
-makefiles or even an advanced build system like CMake.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json
deleted file mode 100644
index b7c9ef0f71b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-and-building-on-the-HPC",
-    "title_depth": 2,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_3",
-    "next_title": "compiling_your_software_paragraph_5",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt
deleted file mode 100644
index 342262b9264..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-All the HPC nodes run the same version of the Operating System, i.e. RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). So,
-it is sufficient to compile your program on any compute node. Once you
-have generated an executable with your compiler, this executable should
-be able to run on any other compute-node.
-A typical process looks like:
-1.  Copy your software to the login-node of the HPC
-2.  Start an interactive session on a compute node;
-3.  Compile it;
-4.  Test it locally;
-5.  Generate your job scripts;
-6.  Test it on the HPC
-7.  Run it (in parallel);
-We assume you've copied your software to the HPC. The next step is to request
-your private compute node.
-$ qsub -I
-qsub: waiting for job 123456 to start
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json
deleted file mode 100644
index 02a8fad0ae2..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-and-building-on-the-HPC",
-    "title_depth": 2,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_4",
-    "next_title": "compiling_your_software_paragraph_6",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt
deleted file mode 100644
index 7ebde664878..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Compiling a sequential program in C
-Go to the examples for chapter 
-Compiling and testing your software on the HPC and load the 
-foss module:
-cd ~/examples/Compiling-and-testing-your-software-on-the-HPC
-module load foss
-We now list the directory and explore the contents of the "hello.c"
-program:
-$ ls -l
-total 512
--rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c
--rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs*
--rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c
--rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs
-/*
- * VSC        : Flemish Supercomputing Centre
- * Tutorial   : Introduction to HPC
- * Description: Print 500 numbers, whilst waiting 1 second in between
- */
-#include "stdio.h"
-int main( int argc, char *argv[] )
-{
-  int i;
-  for (i=0; i<500; i++)
-  {
-    printf("Hello #%d\n", i);
-    fflush(stdout);
-    sleep(1);
-  }
-}
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json
deleted file mode 100644
index 16942249583..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-a-sequential-program-in-C",
-    "title_depth": 3,
-    "directory": "compiling_your_software",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc"
-    },
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_5",
-    "next_title": "compiling_your_software_paragraph_7",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt
deleted file mode 100644
index 1d58d0d6ae4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-The "hello.c" program is a simple source file, written in C. It'll print
-500 times "Hello #&lt;num&gt;", and waits one second between 2 printouts.
-We first need to compile this C-file into an executable with the
-gcc-compiler.
-First, check the command line options for *"gcc" (GNU C-Compiler)*, then
-we compile. the O2 option enables a moderate level of optimization when compiling the code. 
-It instructs the compiler to optimize the code for better performance without significantly increasing compilation time.
-Finally, list the contents of the directory again:
-$ gcc -help
-$ gcc -O2 -o hello hello.c
-$ ls -l
-total 512
--rwxrwxr-x 1 vsc40000 7116 Sep 16 11:43 hello*
--rw-r--r-- 1 vsc40000  214 Sep 16 09:42 hello.c
--rwxr-xr-x 1 vsc40000  130 Sep 16 11:39 hello.pbs*
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json
deleted file mode 100644
index e5f3161c3f2..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-a-sequential-program-in-C",
-    "title_depth": 3,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_6",
-    "next_title": "compiling_your_software_paragraph_8",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt
deleted file mode 100644
index 5ca5de1e6d4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-A new file "hello" has been created. Note that this file has "execute"
-rights, i.e., it is an executable. More often than not, calling gcc --
-or any other compiler for that matter -- will provide you with a list of
-errors and warnings referring to mistakes the programmer made, such as
-typos, syntax errors. You will have to correct them first in order to
-make the code compile. Warnings pinpoint less crucial issues that may
-relate to performance problems, using unsafe or obsolete language
-features, etc. It is good practice to remove all warnings from a
-compilation process, even if they seem unimportant so that a code change
-that produces a warning does not go unnoticed.
-Let's test this program on the local compute node, which is at your
-disposal after the qsub --I command:
-$ ./hello
-Hello #0
-Hello #1
-Hello #2
-Hello #3
-Hello #4
-...
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json
deleted file mode 100644
index 942949951d1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-a-sequential-program-in-C",
-    "title_depth": 3,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_7",
-    "next_title": "compiling_your_software_paragraph_9",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt
deleted file mode 100644
index 28982d2bd95..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-It seems to work, now run it on the HPC
-qsub hello.pbs
-Compiling a parallel program in C/MPI
-cd ~/examples/Compiling-and-testing-your-software-on-the-HPC
-List the directory and explore the contents of the "mpihello.c"
-program:
-$ ls -l
-total 512
-total 512
--rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c
--rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs*
--rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c
--rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs
-/*
- * VSC        : Flemish Supercomputing Centre
- * Tutorial   : Introduction to HPC
- * Description: Example program, to compile with MPI
- */
-#include <stdio.h>
-#include <mpi.h>
-main(int argc, char **argv)
-{
-  int node, i, j;
-  float f;
-  MPI_Init(&argc,&argv);
-  MPI_Comm_rank(MPI_COMM_WORLD, &node);
-   
-  printf("Hello World from Node %d.\n", node);
-  for (i=0; i<=100000; i++)
-    f=i*2.718281828*i+i+i*3.141592654;
-  MPI_Finalize();
-}
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json
deleted file mode 100644
index fe51e423a96..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "main_title": "compiling_your_software",
-    "subtitle": "Compiling-a-parallel-program-in-CMPI",
-    "title_depth": 3,
-    "directory": "compiling_your_software",
-    "parent_title": "",
-    "previous_title": "compiling_your_software_paragraph_8",
-    "next_title": "compiling_your_software_paragraph_10",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-cmpi"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt
deleted file mode 100644
index bc5a1f80140..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-Connecting to the HPC infrastructure
-Before you can really start using the HPC clusters, there are several things
-you need to do or know:
-1.  You need to log on to the cluster using an SSH client to one of
-    the login nodes or by using the HPC web portal.
-    This will give you command-line access.
-    A standard web browser like Firefox or Chrome for the web portal will suffice.
-2.  Before you can do some work, you'll have to transfer the files
-    that you need from your desktop computer to the cluster. At the end
-    of a job, you might want to transfer some files back.
-3.  Optionally, if you wish to use programs with a **graphical user
-    interface**, you will need an X-server on your client system and log
-    in to the login nodes with X-forwarding enabled.
-4.  Often several versions of software packages and libraries are
-    installed, so you need to select the ones you need. To manage
-    different versions efficiently, the VSC clusters use so-called
-    modules, so you will need to select and load the modules that
-    you need.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt
deleted file mode 100644
index 5c715d218a1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-You can exit the connection at anytime by entering:
-$ exit
-logout
-Connection to login.hpc.ugent.be closed.
- tip "tip: Setting your Language right"
-    You may encounter a warning message similar to the following one during connecting:
-    perl: warning: Setting locale failed.
-    perl: warning: Please check that your locale settings:
-    LANGUAGE = (unset),
-    LC_ALL = (unset),
-    LC_CTYPE = "UTF-8",
-    LANG = (unset)
-        are supported and installed on your system.
-    perl: warning: Falling back to the standard locale ("C").
-    or any other error message complaining about the locale.
-    This means that the correct "locale" has not yet been properly specified on your local machine. Try:
-    LANG=
-    LC_COLLATE="C"
-    LC_CTYPE="UTF-8"
-    LC_MESSAGES="C"
-    LC_MONETARY="C"
-    LC_NUMERIC="C"
-    LC_TIME="C"
-    LC_ALL=
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json
deleted file mode 100644
index 96a1f9cee80..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_9",
-    "next_title": "connecting_paragraph_11",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt
deleted file mode 100644
index df00d4ed2a4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-Fast file transfer for large datasets
-See the section on rsync in chapter 5 of the Linux intro manual.
-Changing login nodes
-It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node.
-For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC:
-ssh gligar07.gastly.os
-This is also possible the other way around.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
deleted file mode 100644
index 74ea0125d71..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Changing-login-nodes",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/linux-tutorial/uploading_files/#copying-faster-with-rsync"
-    },
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_14",
-    "next_title": "connecting_paragraph_16",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt
deleted file mode 100644
index dd4f3269fb5..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-If you want to find out which login host you are connected to, you can use the hostname command.
-$ hostname
-gligar07.gastly.os
-$ ssh gligar08.gastly.os
-$ hostname
-gligar08.gastly.os
-Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux.
-These can make sessions that 'survives' across disconnects.
-You can find more information on how to use these tools here (or on other online sources):
-- screen
-- tmux
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json
deleted file mode 100644
index 623be877f5b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Changing-login-nodes",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "links": {
-        "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/",
-        "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/"
-    },
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_15",
-    "next_title": null,
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json
deleted file mode 100644
index 783e60c1ab5..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Connecting-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 1,
-    "directory": "connecting",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/web_portal"
-    },
-    "parent_title": "",
-    "previous_title": null,
-    "next_title": "connecting_paragraph_2",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#connecting-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
deleted file mode 100644
index 49c4572f3b2..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-Connection restrictions
-Since March 20th 2020, restrictions are in place that limit from where
-you can connect to the VSC HPC infrastructure, in response to security
-incidents involving several European HPC centres.
-VSC login nodes are only directly accessible from within university
-networks, and from (most) Belgian commercial internet providers.
-All other IP domains are blocked by default. If you are connecting from
-an IP address that is not allowed direct access, you have the following
-options to get access to VSC login nodes:
--   Use an VPN connection to connect to UGent the network (recommended). See <https://helpdesk.ugent.be/vpn/en/> for more information. 
--   Whitelist your IP address automatically by accessing
-    <https://firewall.vscentrum.be> and log in with your UGent account.
-    -   While this web connection is active new SSH sessions can be
-        started.
-    -   Active SSH sessions will remain active even when this web page
-        is closed.
--   Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your
-    IP range (e.g., for industry access, automated processes).
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json
deleted file mode 100644
index 10f3e042d9a..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Connection-restrictions",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_1",
-    "next_title": "connecting_paragraph_3",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
deleted file mode 100644
index db490973b7f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-Trying to establish an SSH connection from an IP address that does not
-adhere to these restrictions will result in an immediate failure to
-connect, with an error message like:
-ssh_exchange_identification: read: Connection reset by peer
-First Time connection to the HPC infrastructure
-The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH.
-If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal.
-If you have any issues connecting to the HPC after you've followed these
-steps, see Issues connecting to login node to troubleshoot.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
deleted file mode 100644
index 8d6b1696e08..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/web_portal",
-        "1": "https://docs.hpc.ugent.be/troubleshooting/#issues-connecting-to-login-node"
-    },
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_2",
-    "next_title": "connecting_paragraph_4",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt
deleted file mode 100644
index 862e6952252..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Congratulations, you're on the HPC infrastructure now!
-To find out where you have landed you can print the current working directory:
-$ pwd
-/user/home/gent/vsc400/vsc40000
-Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own
-subdirectory structure, copy and prepare your applications, compile and
-test them and submit your jobs on the HPC.
-$ cd /apps/gent/tutorials
-$ ls
-Intro-HPC/
-This directory currently contains all training material for the Introduction to the HPC. More
-relevant training material to work with the HPC can always be added later in
-this directory.
-You can now explore the content of this directory with the "ls --l" (lists long) and the "cd" (change directory) commands:
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json
deleted file mode 100644
index 66b2a89fbb1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_5",
-    "next_title": "connecting_paragraph_7",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt
deleted file mode 100644
index aa590b9b269..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-As we are interested in the use of the HPC, move further to Intro-HPC and explore the
-contents up to 2 levels deep:
-$ cd Intro-HPC
-$ tree -L 2
-.
-'-- examples
-    |-- Compiling-and-testing-your-software-on-the-HPC
-    |-- Fine-tuning-Job-Specifications
-    |-- Multi-core-jobs-Parallel-Computing
-    |-- Multi-job-submission
-    |-- Program-examples
-    |-- Running-batch-jobs
-    |-- Running-jobs-with-input
-    |-- Running-jobs-with-input-output-data
-    |-- example.pbs
-    '-- example.sh
-9 directories, 5 files
-This directory contains:
-1.  This HPC Tutorial (in either a Mac, Linux or      Windows version).
-2.  An examples subdirectory, containing all the examples that you need in this
-    Tutorial, as well as examples that might be useful for your specific applications.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json
deleted file mode 100644
index 6e3f90fbe8a..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_6",
-    "next_title": "connecting_paragraph_8",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt
deleted file mode 100644
index 634df6034b1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-cd examples
- tip
-    Typing cd ex followed by tab (the Tab-key) will generate the cd examples
-    command. Command-line completion (also tab completion) is a common feature of the bash command
-    line interpreter, in which the program automatically fills in partially
-    typed commands.
- tip
-    For more exhaustive tutorials about Linux usage, see Appendix Useful Linux Commands
-The first action is to copy the contents of the HPC examples directory to
-your home directory, so that you have your own personal copy and that
-you can start using the examples. The "-r" option of the copy command
-will also copy the contents of the sub-directories "recursively".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
deleted file mode 100644
index 38f265cfdcd..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/useful_linux_commands"
-    },
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_7",
-    "next_title": "connecting_paragraph_9",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt
deleted file mode 100644
index ad2fee7457f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-cp -r /apps/gent/tutorials/Intro-HPC/examples ~/
-Go to your home directory, check your own private examples directory, ... and start working.
-cd
-ls -l
-Upon connecting you will see a login message containing your last login time stamp and a basic overview of the current cluster utilisation.
-Last login: Thu Mar 18 13:15:09 2021 from gligarha02.gastly.os
- STEVIN HPC-UGent infrastructure status on Mon, 19 Feb 2024 10:00:01
-      cluster         - full - free -  part - total - running - queued
-                        nodes  nodes   free   nodes   jobs      jobs
- -------------------------------------------------------------------------
-           skitty          39      0     26      68      1839     5588
-           joltik           6      0      1      10        29       18
-            doduo          22      0     75     128      1397    11933
-         accelgor           4      3      2       9        18        1
-          donphan           0      0     16      16        16       13
-          gallade           2      0      5      16        19      136
-For a full view of the current loads and queues see:
-https://hpc.ugent.be/clusterstate/
-Updates on current system status and planned maintenance can be found on https://www.ugent.be/hpc/en/infrastructure/status
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json
deleted file mode 100644
index bd1d462e614..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "",
-    "previous_title": "connecting_paragraph_8",
-    "next_title": "connecting_paragraph_10",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
deleted file mode 100644
index dfc59211792..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Adding multiple SSH public keys (optional)
-In case you are connecting from different computers to the login nodes,
-it is advised to use separate SSH public keys to do so. You should
-follow these steps.
-1.  Create a new public/private SSH key pair from the new computer.
-    Repeat the process described in
-    section Generate a public/private key pair with OpenSSH.
-2.  Go to <https://account.vscentrum.be/django/account/edit>
-3.  Upload the new SSH public key using the Add public key section. Make sure that your
-    public key is actually saved, because a public key will be refused
-    if it is too short, wrong type, or in a wrong format.
-4.  (optional) If you lost your key, you can delete the old key on the
-    same page. You should keep at least one valid public SSH key in your
-    account.
-5.  Take into account that it will take some time before the new SSH
-    public key is active in your account on the system; waiting for
-    15-30 minutes should be sufficient.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json
deleted file mode 100644
index ffdeaf550e0..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Applying-for-the-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh"
-    },
-    "previous_title": "account_paragraph_10",
-    "next_title": "account_paragraph_12",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#adding-multiple-ssh-public-keys-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt
deleted file mode 100644
index caaaea5ee91..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-How do SSH keys work
-Launch a terminal from your desktop's application menu and you will see
-the bash shell. There are other shells, but most Linux distributions use
-bash by default.
-Test OpenSSH
-Secure Shell (ssh) is a cryptographic network protocol for secure data
-communication, remote command-line login, remote command execution, and
-other secure network services between two networked computers. In short,
-ssh provides a secure connection between 2 computers via insecure
-channels (Network, Internet, telephone lines, ...).
-"Secure" means that:
-1.  the User is authenticated to the System; and
-2.  the System is authenticated to the User; and
-3.  all data is encrypted during transfer.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json
deleted file mode 100644
index 7654a65253a..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Test-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_paragraph_4",
-    "next_title": "account_linux_paragraph_5.2",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt
deleted file mode 100644
index 318f913fba3..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-OpenSSH is a FREE implementation of the SSH connectivity protocol.  comes
-with its own implementation of OpenSSH, so you don't need to install any
-third-party software to use it. Just open a terminal window and jump in!
-On all popular Linux distributions, the OpenSSH software is readily
-available, and most often installed by default. You can check whether
-the OpenSSH software is installed by opening a terminal and typing:
-$ ssh -V
-OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017
-To access the clusters and transfer your files, you will use the
-following commands:
-1.  ssh-keygen: to generate the SSH key pair (public + private key);
-2.  ssh: to open a shell on a remote machine;
-3.  sftp: a secure equivalent of ftp;
-4.  scp: a secure equivalent of the remote copy command rcp.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json
deleted file mode 100644
index 32f1120307f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Test-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_linux_paragraph_5.1",
-    "next_title": "account_linux_paragraph_5.3",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
deleted file mode 100644
index 5df90a3dd7c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Generate a public/private key pair with OpenSSH
-A key pair might already be present in the default location inside your
-home directory. Therefore, we first check if a key is available with the
-"list short" ("ls") command:
-ls ~/.ssh
-If a key-pair is already available, you would normally get:
-authorized_keys     id_rsa      id_rsa.pub      known_hosts
-Otherwise, the command will show:
-ls: .ssh: No such file or directory
-You can recognise a public/private key pair when a pair of files has the
-same name except for the extension ".pub" added to one of them. In this
-particular case, the private key is "id_rsa" and public key is
-"id_rsa.pub". You may have multiple keys (not necessarily in the
-directory "~/.ssh") if you or your operating system requires this. Be
-aware that your existing key pair might be too short, or not the right
-type.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json
deleted file mode 100644
index 722ba1a2ad4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_linux_paragraph_5.2",
-    "next_title": "account_linux_paragraph_5.4",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt
deleted file mode 100644
index d29d61d27d9..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-You will need to generate a new key pair, when:
-1.  you don't have a key pair yet
-2.  you forgot the passphrase protecting your private key
-3.  your private key was compromised
-4.  your key pair is too short or not the right type
-For extra security, the private key itself can be encrypted using a
-"passphrase", to prevent anyone from using your private key even when
-they manage to copy it. You have to "unlock" the private key by typing
-the passphrase. Be sure to never give away your private key, it is
-private and should stay private. You should not even copy it to one of
-your other machines, instead, you should create a new public/private key
-pair for each machine.
-ssh-keygen -t rsa -b 4096
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json
deleted file mode 100644
index 4f65f6ebf36..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_linux_paragraph_5.3",
-    "next_title": "account_linux_paragraph_5.5",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt
deleted file mode 100644
index 78c142e82e0..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-This will ask you for a file name to store the private and public key,
-and a passphrase to protect your private key. It needs to be emphasised
-that you really should choose the passphrase wisely! The system will ask
-you for it every time you want to use the private key that is every time
-you want to access the cluster or transfer your files.
-Without your key pair, you won't be able to apply for a personal VSC account.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json
deleted file mode 100644
index 468fb5d0938..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_linux_paragraph_5.4",
-    "next_title": "account_paragraph_6",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt
deleted file mode 100644
index c3b395b5296..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt
+++ /dev/null
@@ -1 +0,0 @@
-Using an SSH agent (optional)
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json
deleted file mode 100644
index fb82c40a7d7..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Using-an-SSH-agent-(optional",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_paragraph_5",
-    "next_title": "account_paragraph_7",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt
deleted file mode 100644
index 8e8429c1642..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Using an SSH agent (optional)
-Most recent Unix derivatives include by default an SSH agent ("gnome-keyring-daemon" in most cases)  
-to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into
-the SSH manager keyring to be able to connect to the HPC cluster. If
-not, SSH client will display an error message (see Connecting) similar to this:
-Agent admitted failure to sign using the key. 
-Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
-This could be fixed using the ssh-add command. You can include the new
-private keys' identities in your keyring with:
-ssh-add
- tip
-    Without extra options ssh-add adds any key located at $HOME/.ssh
-    directory, but you can specify the private key location path as
-    argument, as example: ssh-add /path/to/my/id_rsa.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
deleted file mode 100644
index 2b3633d71e7..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Using-an-SSH-agent-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/connecting"
-    },
-    "previous_title": "account_paragraph_6",
-    "next_title": "account_linux_paragraph_7.2",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt
deleted file mode 100644
index c227dbbb6e2..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-Check that your key is available from the keyring with:
-ssh-add -l
-After these changes the key agent will keep your SSH key to connect to
-the clusters as usual.
- tip
-    You should execute ssh-add command again if you generate a new SSH
-    key.
-Visit <https://wiki.gnome.org/Projects/GnomeKeyring/Ssh> for more information.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json
deleted file mode 100644
index de9700c7a5b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Using-an-SSH-agent-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_linux_paragraph_7.1",
-    "next_title": "account_paragraph_8",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt
deleted file mode 100644
index 815c414e059..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-Applying for the account
-After you log in using your UGent login and password, you will be asked to
-upload the file that contains your public key, i.e., the file
-"id_rsa.pub" which you have generated earlier. Make sure that your
-public key is actually accepted for upload, because if it is in a wrong
-format, wrong type or too short, then it will be refused.
-This file has been stored in the directory "~/.ssh/".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json
deleted file mode 100644
index 31c14d853b3..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Applying-for-the-account",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 2,
-    "directory": "account",
-    "parent_title": "account",
-    "previous_title": "account_paragraph_8",
-    "next_title": "account_paragraph_10",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/account/#applying-for-the-account"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt
deleted file mode 100644
index 1d912924535..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-First Time connection to the HPC infrastructure
-    A locale is a set of parameters that defines the user's language, country and
-    any special variant preferences that the user wants to see in their user
-    interface. Usually a locale identifier consists of at least a language
-    identifier and a region identifier.
-     Note
-        If you try to set a non-supported locale, then it will be automatically
-        set to the default. Currently the default is en_US.UFT-8 or en_US,
-        depending on whether your originally (non-supported) locale was UTF-8 or not.
-    Open the .bashrc on your local machine with your favourite editor and
-    add the following lines:
-    
-    $ nano ~/.bashrc
-    ...
-    export LANGUAGE="en_US.UTF-8"
-    export LC_ALL="en_US.UTF-8"
-    export LC_CTYPE="en_US.UTF-8"
-    export LANG="en_US.UTF-8"
-    ...
-    
-     tip "tip: vi"
-        To start entering text in vi: move to the place you want to start
-        entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
-        To exit vi without saving your changes, enter ""ESC":q!"
-    
-    
-    or alternatively (if you are not comfortable with the Linux editors),
-    again on your local machine:
-    
-    echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
-    
-    You can now log out, open a new terminal/shell on your local machine and
-    reconnect to the login node, and you should not get these warnings anymore.
-    
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json
deleted file mode 100644
index ef14b084e5f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
-    "previous_title": "connecting_paragraph_10",
-    "next_title": "connecting_paragraph_12",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt
deleted file mode 100644
index d872c89a0f8..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Transfer Files to/from the HPC
-Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
-The preferred way to transfer files is by using an scp or sftp via the
-secure OpenSSH protocol.  ships with an implementation of OpenSSH, so you
-don't need to install any third-party software to use it. Just open a
-terminal window and jump in!
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json
deleted file mode 100644
index 081156a5d16..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Transfer-Files-tofrom-the-HPC",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
-    "previous_title": "connecting_paragraph_11",
-    "next_title": "connecting_paragraph_13",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt
deleted file mode 100644
index 8d0031fcca9..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Transfer Files tofrom the HPC
-Using scp
-Secure copy or SCP is a tool (command) for securely transferring files between a local
-host (= your computer) and a remote host (the HPC). It is based on the
-Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e.,
-copy) command, but can copy files to or from remote machines.
-It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if
-you have symlinks to them in your home directory. See 
-the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux
- for how to do this.
-Open an additional terminal window and check that you're working on your
-local machine.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
deleted file mode 100644
index 6b70790e1e3..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
-    },
-    "previous_title": "connecting_paragraph_12",
-    "next_title": "connecting_linux_paragraph_13.2",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt
deleted file mode 100644
index f1da0677a67..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-$ hostname
-<local-machine-name>
-If you're still using the terminal that is connected to the HPC, close the
-connection by typing "exit" in the terminal window.
-For example, we will copy the (local) file "localfile.txt" to your
-home directory on the HPC cluster. We first generate a small dummy
-"localfile.txt", which contains the word "Hello". Use your own VSC
-account, which is something like "vsc40000". Don't forget the colon (:) at the
-end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your
-local filesystem. You can even specify where to save the file on the
-remote filesystem by putting a path after the colon.
-$ echo "Hello" > localfile.txt
-$ ls -l 
-...
--rw-r--r-- 1 user  staff   6 Sep 18 09:37 localfile.txt
-$ scp localfile.txt vsc40000@login.hpc.ugent.be:
-localfile.txt     100%   6     0.0KB/s     00:00
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json
deleted file mode 100644
index 43affa4e36c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_linux_paragraph_13.1",
-    "next_title": "connecting_linux_paragraph_13.3",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt
deleted file mode 100644
index 9585900e356..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Connect to the HPC via another terminal, print the working directory (to
-make sure you're in the home directory) and check whether the file has
-arrived:
-$ pwd
-/user/home/gent/vsc400/vsc40000
-$ ls -l 
-total 1536
-drwxrwxr-x 2
-drwxrwxr-x 2
-drwxrwxr-x 10
--rw-r--r-- 1
-$ cat localfile.txt
-Hello
-The scp command can also be used to copy files from the cluster to your
-local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs"
-subdirectory on the cluster to your local computer.
-First, we will confirm that the file is indeed in the "docs"
-subdirectory. In the terminal on the login node, enter:
-$ cd ~/docs
-$ ls -l
-total 1536
--rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json
deleted file mode 100644
index ccc74bb5b94..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_linux_paragraph_13.2",
-    "next_title": "connecting_linux_paragraph_13.4",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt
deleted file mode 100644
index d09b69552ef..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Now we will copy the file to the local machine. On the terminal on your
-own local computer, enter:
-$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf .
-intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01
-$ ls -l
-total 899
--rw-r--r-- 1 user staff 741995 Sep 18 09:53
--rw-r--r-- 1 user staff      6 Sep 18 09:37 localfile.txt
-The file has been copied from the HPC to your local computer.
-It's also possible to copy entire directories (and their contents) with
-the -r flag. For example, if we want to copy the local directory
-dataset to $VSC_SCRATCH, we can use the following command (assuming
-you've created the scratch symlink):
-scp -r dataset vsc40000@login.hpc.ugent.be:scratch
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json
deleted file mode 100644
index 9ffcc4121f4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_linux_paragraph_13.3",
-    "next_title": "connecting_linux_paragraph_13.5",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt
deleted file mode 100644
index 532d57bb4a5..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-If you don't use the -r option to copy a directory, you will run into
-the following error:
-$ scp dataset vsc40000@login.hpc.ugent.be:scratch
-dataset: not a regular file
-Using sftp
-The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file
-transfer and file management functionalities over any reliable data
-stream. It was designed as an extension of the Secure Shell protocol
-(SSH) version 2.0. This protocol assumes that it is run over a secure
-channel, such as SSH, that the server has already authenticated the
-client, and that the identity of the client user is available to the
-protocol.
-The sftp is an equivalent of the ftp command, with the difference that
-it uses the secure ssh protocol to connect to the clusters.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json
deleted file mode 100644
index 8e3b4056b6b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-sftp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_linux_paragraph_13.4",
-    "next_title": "connecting_linux_paragraph_13.6",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt
deleted file mode 100644
index 1ef13b80c6f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-One easy way of starting a sftp session is
-sftp vsc40000@login.hpc.ugent.be
-Typical and popular commands inside an sftp session are:
-|                       |                                                                                      |
-|:--------------------------|:-------------------------------------------------------------------------------------|
-| cd ~/exmples/fibo     | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine)   |
-| ls                    | Get a list of the files in the current directory on the HPC.                   |
-| get fibo.py           | Copy the file "fibo.py" from the HPC                                           |
-| get tutorial/HPC.pdf  | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. |
-| lcd test              | Move to the "test" subdirectory on your local machine.                               |
-| lcd ..                | Move up one level in the local directory.                                            |
-| lls                   | Get local directory listing.                                                         |
-| put test.py           | Copy the local file test.py to the HPC.                                        |
-| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py.                       |
-| bye                   | Quit the sftp session                                                                |
-| **mget *.cc**             | Copy all the remote files with extension ".cc" to the local directory.               |
-| **mput *.h**              | Copy all the local files with extension ".h" to the HPC.                       |
-|                       |                                                                                      |
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json
deleted file mode 100644
index c7fe6bf6a44..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-sftp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/",
-        "1": "https://docs.hpc.ugent.be/"
-    },
-    "previous_title": "connecting_linux_paragraph_13.5",
-    "next_title": "connecting_linux_paragraph_13.7",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt
deleted file mode 100644
index a0496edfb14..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Transfer Files tofrom the HPC
-Using a GUI
-If you prefer a GUI to transfer files back and forth to the HPC, you can
-use your file browser. Open your file browser and press
-++"Ctrl"+"l"++
-This should open up a address bar where you can enter a URL.
-Alternatively, look for the "connect to server" option in your file
-browsers menu.
-Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter.
-You should now be able to browse files on the HPC in your file browser.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json
deleted file mode 100644
index e3c48fe4829..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-a-GUI",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_paragraph_13",
-    "next_title": "connecting_paragraph_15",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt
deleted file mode 100644
index 27ae3fb7bd4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-First Time connection to the HPC infrastructure
-Connect
-Open up a terminal and enter the following command to connect to the HPC.
-ssh vsc40000@login.hpc.ugent.be
-Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login
-node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command.
-The first time you make a connection to the login node, you will be
-asked to verify the authenticity of the login node. Please check
-Warning message when first connecting to new host on how to do this.
-A possible error message you can get if you previously saved your
-private key somewhere else than the default location
-($HOME/.ssh/id_rsa):
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
deleted file mode 100644
index 66c5dc4aeff..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Connect",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host"
-    },
-    "previous_title": "connecting_paragraph_4",
-    "next_title": "connecting_linux_paragraph_5.2",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt
deleted file mode 100644
index be01e09bba0..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
-In this case, use the -i option for the ssh command to specify the
-location of your private key. For example:
-ssh -i /home/example/my_keys
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json
deleted file mode 100644
index 21b63518804..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Connect",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "previous_title": "connecting_linux_paragraph_5.1",
-    "next_title": "connecting_paragraph_6",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
deleted file mode 100644
index dfc59211792..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Adding multiple SSH public keys (optional)
-In case you are connecting from different computers to the login nodes,
-it is advised to use separate SSH public keys to do so. You should
-follow these steps.
-1.  Create a new public/private SSH key pair from the new computer.
-    Repeat the process described in
-    section Generate a public/private key pair with OpenSSH.
-2.  Go to <https://account.vscentrum.be/django/account/edit>
-3.  Upload the new SSH public key using the Add public key section. Make sure that your
-    public key is actually saved, because a public key will be refused
-    if it is too short, wrong type, or in a wrong format.
-4.  (optional) If you lost your key, you can delete the old key on the
-    same page. You should keep at least one valid public SSH key in your
-    account.
-5.  Take into account that it will take some time before the new SSH
-    public key is active in your account on the system; waiting for
-    15-30 minutes should be sufficient.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json
deleted file mode 100644
index d9d3c33f876..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Applying-for-the-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh"
-    },
-    "previous_title": "account_paragraph_10",
-    "next_title": "account_paragraph_12",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#adding-multiple-ssh-public-keys-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt
deleted file mode 100644
index d96c80b42a2..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-How do SSH keys work
-To open a Terminal window in macOS, open the Finder and choose
-*\>\> Applications \> Utilities \> Terminal*
-Before requesting an account, you need to generate a pair of ssh keys.
-One popular way to do this on  is using the OpenSSH client included with , which you can then also use to log on to the clusters.
-Test OpenSSH
-Secure Shell (ssh) is a cryptographic network protocol for secure data
-communication, remote command-line login, remote command execution, and
-other secure network services between two networked computers. In short,
-ssh provides a secure connection between 2 computers via insecure
-channels (Network, Internet, telephone lines, ...).
-"Secure" means that:
-1.  the User is authenticated to the System; and
-2.  the System is authenticated to the User; and
-3.  all data is encrypted during transfer.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json
deleted file mode 100644
index 028d9d25f7f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Test-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_paragraph_4",
-    "next_title": "account_macos_paragraph_5.2",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt
deleted file mode 100644
index 318f913fba3..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-OpenSSH is a FREE implementation of the SSH connectivity protocol.  comes
-with its own implementation of OpenSSH, so you don't need to install any
-third-party software to use it. Just open a terminal window and jump in!
-On all popular Linux distributions, the OpenSSH software is readily
-available, and most often installed by default. You can check whether
-the OpenSSH software is installed by opening a terminal and typing:
-$ ssh -V
-OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017
-To access the clusters and transfer your files, you will use the
-following commands:
-1.  ssh-keygen: to generate the SSH key pair (public + private key);
-2.  ssh: to open a shell on a remote machine;
-3.  sftp: a secure equivalent of ftp;
-4.  scp: a secure equivalent of the remote copy command rcp.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json
deleted file mode 100644
index dfec6f6fd5a..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Test-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_macos_paragraph_5.1",
-    "next_title": "account_macos_paragraph_5.3",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
deleted file mode 100644
index 5df90a3dd7c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-Generate a public/private key pair with OpenSSH
-A key pair might already be present in the default location inside your
-home directory. Therefore, we first check if a key is available with the
-"list short" ("ls") command:
-ls ~/.ssh
-If a key-pair is already available, you would normally get:
-authorized_keys     id_rsa      id_rsa.pub      known_hosts
-Otherwise, the command will show:
-ls: .ssh: No such file or directory
-You can recognise a public/private key pair when a pair of files has the
-same name except for the extension ".pub" added to one of them. In this
-particular case, the private key is "id_rsa" and public key is
-"id_rsa.pub". You may have multiple keys (not necessarily in the
-directory "~/.ssh") if you or your operating system requires this. Be
-aware that your existing key pair might be too short, or not the right
-type.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json
deleted file mode 100644
index 5a10e780b45..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_macos_paragraph_5.2",
-    "next_title": "account_macos_paragraph_5.4",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt
deleted file mode 100644
index d29d61d27d9..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-You will need to generate a new key pair, when:
-1.  you don't have a key pair yet
-2.  you forgot the passphrase protecting your private key
-3.  your private key was compromised
-4.  your key pair is too short or not the right type
-For extra security, the private key itself can be encrypted using a
-"passphrase", to prevent anyone from using your private key even when
-they manage to copy it. You have to "unlock" the private key by typing
-the passphrase. Be sure to never give away your private key, it is
-private and should stay private. You should not even copy it to one of
-your other machines, instead, you should create a new public/private key
-pair for each machine.
-ssh-keygen -t rsa -b 4096
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json
deleted file mode 100644
index 8da465c1f24..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_macos_paragraph_5.3",
-    "next_title": "account_macos_paragraph_5.5",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt
deleted file mode 100644
index 78c142e82e0..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-This will ask you for a file name to store the private and public key,
-and a passphrase to protect your private key. It needs to be emphasised
-that you really should choose the passphrase wisely! The system will ask
-you for it every time you want to use the private key that is every time
-you want to access the cluster or transfer your files.
-Without your key pair, you won't be able to apply for a personal VSC account.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json
deleted file mode 100644
index 9d6f7b1a741..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_macos_paragraph_5.4",
-    "next_title": "account_paragraph_6",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt
deleted file mode 100644
index c3b395b5296..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt
+++ /dev/null
@@ -1 +0,0 @@
-Using an SSH agent (optional)
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json
deleted file mode 100644
index 17a34a2f80b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Using-an-SSH-agent-(optional",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_paragraph_5",
-    "next_title": "account_paragraph_7",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt
deleted file mode 100644
index 1069ebd9fbd..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Using an SSH agent (optional)
-Most recent Unix derivatives include by default an SSH agent 
-to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into
-the SSH manager keyring to be able to connect to the HPC cluster. If
-not, SSH client will display an error message (see Connecting) similar to this:
-Agent admitted failure to sign using the key. 
-Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
-This could be fixed using the ssh-add command. You can include the new
-private keys' identities in your keyring with:
-ssh-add
- tip
-    Without extra options ssh-add adds any key located at $HOME/.ssh
-    directory, but you can specify the private key location path as
-    argument, as example: ssh-add /path/to/my/id_rsa.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
deleted file mode 100644
index 18b3b3675de..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Using-an-SSH-agent-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/connecting"
-    },
-    "previous_title": "account_paragraph_6",
-    "next_title": "account_macos_paragraph_7.2",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt
deleted file mode 100644
index c880ee4a228..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-Check that your key is available from the keyring with:
-ssh-add -l
-After these changes the key agent will keep your SSH key to connect to
-the clusters as usual.
- tip
-    You should execute ssh-add command again if you generate a new SSH
-    key.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json
deleted file mode 100644
index 072a43cb3e4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Using-an-SSH-agent-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_macos_paragraph_7.1",
-    "next_title": "account_paragraph_8",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt
deleted file mode 100644
index 5a5a52da062..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Applying for the account
-After you log in using your UGent login and password, you will be asked to
-upload the file that contains your public key, i.e., the file
-"id_rsa.pub" which you have generated earlier. Make sure that your
-public key is actually accepted for upload, because if it is in a wrong
-format, wrong type or too short, then it will be refused.
-This file has been stored in the directory "~/.ssh/".
- tip
-    As ".ssh" is an invisible directory, the Finder will not show it by
-    default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++),
-    which will allow you to enter the name of a directory, which you would
-    like to open in Finder. Here, type "~/.ssh" and press enter.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json
deleted file mode 100644
index 86c8c2048bf..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Applying-for-the-account",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 2,
-    "directory": "account",
-    "parent_title": "account",
-    "previous_title": "account_paragraph_8",
-    "next_title": "account_paragraph_10",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/account/#applying-for-the-account"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt
deleted file mode 100644
index 1d912924535..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-First Time connection to the HPC infrastructure
-    A locale is a set of parameters that defines the user's language, country and
-    any special variant preferences that the user wants to see in their user
-    interface. Usually a locale identifier consists of at least a language
-    identifier and a region identifier.
-     Note
-        If you try to set a non-supported locale, then it will be automatically
-        set to the default. Currently the default is en_US.UFT-8 or en_US,
-        depending on whether your originally (non-supported) locale was UTF-8 or not.
-    Open the .bashrc on your local machine with your favourite editor and
-    add the following lines:
-    
-    $ nano ~/.bashrc
-    ...
-    export LANGUAGE="en_US.UTF-8"
-    export LC_ALL="en_US.UTF-8"
-    export LC_CTYPE="en_US.UTF-8"
-    export LANG="en_US.UTF-8"
-    ...
-    
-     tip "tip: vi"
-        To start entering text in vi: move to the place you want to start
-        entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq"
-        To exit vi without saving your changes, enter ""ESC":q!"
-    
-    
-    or alternatively (if you are not comfortable with the Linux editors),
-    again on your local machine:
-    
-    echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile
-    echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile
-    
-    You can now log out, open a new terminal/shell on your local machine and
-    reconnect to the login node, and you should not get these warnings anymore.
-    
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json
deleted file mode 100644
index 323292b910e..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
-    "previous_title": "connecting_paragraph_10",
-    "next_title": "connecting_paragraph_12",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt
deleted file mode 100644
index d872c89a0f8..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Transfer Files to/from the HPC
-Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
-The preferred way to transfer files is by using an scp or sftp via the
-secure OpenSSH protocol.  ships with an implementation of OpenSSH, so you
-don't need to install any third-party software to use it. Just open a
-terminal window and jump in!
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json
deleted file mode 100644
index 8a420f36c2b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Transfer-Files-tofrom-the-HPC",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
-    "previous_title": "connecting_paragraph_11",
-    "next_title": "connecting_paragraph_13",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
deleted file mode 100644
index 8d0031fcca9..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-Transfer Files tofrom the HPC
-Using scp
-Secure copy or SCP is a tool (command) for securely transferring files between a local
-host (= your computer) and a remote host (the HPC). It is based on the
-Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e.,
-copy) command, but can copy files to or from remote machines.
-It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if
-you have symlinks to them in your home directory. See 
-the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux
- for how to do this.
-Open an additional terminal window and check that you're working on your
-local machine.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
deleted file mode 100644
index 79157005600..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch"
-    },
-    "previous_title": "connecting_paragraph_12",
-    "next_title": "connecting_macos_paragraph_13.2",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
deleted file mode 100644
index f1da0677a67..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-$ hostname
-<local-machine-name>
-If you're still using the terminal that is connected to the HPC, close the
-connection by typing "exit" in the terminal window.
-For example, we will copy the (local) file "localfile.txt" to your
-home directory on the HPC cluster. We first generate a small dummy
-"localfile.txt", which contains the word "Hello". Use your own VSC
-account, which is something like "vsc40000". Don't forget the colon (:) at the
-end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your
-local filesystem. You can even specify where to save the file on the
-remote filesystem by putting a path after the colon.
-$ echo "Hello" > localfile.txt
-$ ls -l 
-...
--rw-r--r-- 1 user  staff   6 Sep 18 09:37 localfile.txt
-$ scp localfile.txt vsc40000@login.hpc.ugent.be:
-localfile.txt     100%   6     0.0KB/s     00:00
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json
deleted file mode 100644
index dc57de365bf..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_macos_paragraph_13.1",
-    "next_title": "connecting_macos_paragraph_13.3",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt
deleted file mode 100644
index 9585900e356..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Connect to the HPC via another terminal, print the working directory (to
-make sure you're in the home directory) and check whether the file has
-arrived:
-$ pwd
-/user/home/gent/vsc400/vsc40000
-$ ls -l 
-total 1536
-drwxrwxr-x 2
-drwxrwxr-x 2
-drwxrwxr-x 10
--rw-r--r-- 1
-$ cat localfile.txt
-Hello
-The scp command can also be used to copy files from the cluster to your
-local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs"
-subdirectory on the cluster to your local computer.
-First, we will confirm that the file is indeed in the "docs"
-subdirectory. In the terminal on the login node, enter:
-$ cd ~/docs
-$ ls -l
-total 1536
--rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json
deleted file mode 100644
index 5a4623c650d..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_macos_paragraph_13.2",
-    "next_title": "connecting_macos_paragraph_13.4",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt
deleted file mode 100644
index d09b69552ef..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Now we will copy the file to the local machine. On the terminal on your
-own local computer, enter:
-$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf .
-intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01
-$ ls -l
-total 899
--rw-r--r-- 1 user staff 741995 Sep 18 09:53
--rw-r--r-- 1 user staff      6 Sep 18 09:37 localfile.txt
-The file has been copied from the HPC to your local computer.
-It's also possible to copy entire directories (and their contents) with
-the -r flag. For example, if we want to copy the local directory
-dataset to $VSC_SCRATCH, we can use the following command (assuming
-you've created the scratch symlink):
-scp -r dataset vsc40000@login.hpc.ugent.be:scratch
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json
deleted file mode 100644
index 54b3fe19d58..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-scp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_macos_paragraph_13.3",
-    "next_title": "connecting_macos_paragraph_13.5",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt
deleted file mode 100644
index 532d57bb4a5..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-If you don't use the -r option to copy a directory, you will run into
-the following error:
-$ scp dataset vsc40000@login.hpc.ugent.be:scratch
-dataset: not a regular file
-Using sftp
-The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file
-transfer and file management functionalities over any reliable data
-stream. It was designed as an extension of the Secure Shell protocol
-(SSH) version 2.0. This protocol assumes that it is run over a secure
-channel, such as SSH, that the server has already authenticated the
-client, and that the identity of the client user is available to the
-protocol.
-The sftp is an equivalent of the ftp command, with the difference that
-it uses the secure ssh protocol to connect to the clusters.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json
deleted file mode 100644
index 0b9ba08e3b1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-sftp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_macos_paragraph_13.4",
-    "next_title": "connecting_macos_paragraph_13.6",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt
deleted file mode 100644
index 1ef13b80c6f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-One easy way of starting a sftp session is
-sftp vsc40000@login.hpc.ugent.be
-Typical and popular commands inside an sftp session are:
-|                       |                                                                                      |
-|:--------------------------|:-------------------------------------------------------------------------------------|
-| cd ~/exmples/fibo     | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine)   |
-| ls                    | Get a list of the files in the current directory on the HPC.                   |
-| get fibo.py           | Copy the file "fibo.py" from the HPC                                           |
-| get tutorial/HPC.pdf  | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. |
-| lcd test              | Move to the "test" subdirectory on your local machine.                               |
-| lcd ..                | Move up one level in the local directory.                                            |
-| lls                   | Get local directory listing.                                                         |
-| put test.py           | Copy the local file test.py to the HPC.                                        |
-| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py.                       |
-| bye                   | Quit the sftp session                                                                |
-| **mget *.cc**             | Copy all the remote files with extension ".cc" to the local directory.               |
-| **mput *.h**              | Copy all the local files with extension ".h" to the HPC.                       |
-|                       |                                                                                      |
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json
deleted file mode 100644
index 9b08fbde549..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-sftp",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/",
-        "1": "https://docs.hpc.ugent.be/"
-    },
-    "previous_title": "connecting_macos_paragraph_13.5",
-    "next_title": "connecting_macos_paragraph_13.7",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt
deleted file mode 100644
index 20a4acb40a8..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Transfer Files tofrom the HPC
-Using a GUI (Cyberduck)
-Cyberduck is a graphical alternative to the scp command. It can be
-installed from <https://cyberduck.io>.
-This is the one-time setup you will need to do before connecting:
-1.  After starting Cyberduck, the Bookmark tab will show up. To add a
-    new bookmark, click on the "+" sign on the bottom left of the
-    window. A new window will open.
-2.  In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)".
-3.  In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in
-    your VSC account id (this looks like vsc40000).
-4.  Select the location of your SSH private key in the "SSH Private Key" field.
-5.  Finally, type in a name for the bookmark in the "Nickname" field and
-    close the window by pressing on the red circle in the top left
-    corner of the window.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json
deleted file mode 100644
index 694b7682aa9..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-a-GUI-(Cyberduck)",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_paragraph_13",
-    "next_title": "connecting_macos_paragraph_14.2",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt
deleted file mode 100644
index 1d20edf411f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-To open the connection, click on the "Bookmarks" icon (which
-resembles an open book) and double-click on the bookmark you just
-created.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json
deleted file mode 100644
index e32b1ab4c58..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Using-a-GUI-(Cyberduck)",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "Transfer-Files-tofrom-the-HPC",
-    "previous_title": "connecting_macos_paragraph_14.1",
-    "next_title": "connecting_paragraph_15",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt
deleted file mode 100644
index 1e22cfc8b1f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-First Time connection to the HPC infrastructure
-Connect
-Open up a terminal and enter the following command to connect to the HPC.
-You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search.
-ssh vsc40000@login.hpc.ugent.be
-Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login
-node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command.
-The first time you make a connection to the login node, you will be
-asked to verify the authenticity of the login node. Please check
-Warning message when first connecting to new host on how to do this.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
deleted file mode 100644
index f928fbfcdd6..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Connect",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host"
-    },
-    "previous_title": "connecting_paragraph_4",
-    "next_title": "connecting_macos_paragraph_5.2",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt
deleted file mode 100644
index f3f5ac6e775..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-A possible error message you can get if you previously saved your
-private key somewhere else than the default location
-($HOME/.ssh/id_rsa):
-Permission denied (publickey,gssapi-keyex,gssapi-with-mic).
-In this case, use the -i option for the ssh command to specify the
-location of your private key. For example:
-ssh -i /home/example/my_keys
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json
deleted file mode 100644
index 047d5863361..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Connect",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "previous_title": "connecting_macos_paragraph_5.1",
-    "next_title": "connecting_paragraph_6",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
deleted file mode 100644
index ca00a8a0f65..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Adding multiple SSH public keys (optional)
-In case you are connecting from different computers to the login nodes,
-it is advised to use separate SSH public keys to do so. You should
-follow these steps.
-1.  Create a new public/private SSH key pair from Putty. Repeat the
-    process described in
-    section Generate a public/private key pair.
-2.  Go to <https://account.vscentrum.be/django/account/edit>
-3.  Upload the new SSH public key using the Add public key section. Make sure that your
-    public key is actually saved, because a public key will be refused
-    if it is too short, wrong type, or in a wrong format.
-4.  (optional) If you lost your key, you can delete the old key on the
-    same page. You should keep at least one valid public SSH key in your
-    account.
-5.  Take into account that it will take some time before the new SSH
-    public key is active in your account on the system; waiting for
-    15-30 minutes should be sufficient.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json
deleted file mode 100644
index 4614c053f2c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Adding-multiple-SSH-public-keys-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Applying-for-the-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair"
-    },
-    "previous_title": "account_paragraph_10",
-    "next_title": "account_paragraph_12",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#adding-multiple-ssh-public-keys-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
deleted file mode 100644
index 93ca7ac9da5..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-How do SSH keys work
-A typical Windows environment does not come with pre-installed software
-to connect and run command-line executables on a HPC. Some tools need to be
-installed on your Windows machine first, before we can start the actual
-work.
-Get PuTTY: A free telnet/SSH client
-We recommend to use the PuTTY tools package, which is freely available.
-You do not need to install PuTTY, you can download the PuTTY and
-PuTTYgen executable and run it. This can be useful in situations where
-you do not have the required permissions to install software on the
-computer you are using. Alternatively, an installation package is also
-available.
-You can download PuTTY from the official address:
-<https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html>. You
-probably want the 64-bits version. If you can install software on your
-computer, you can use the "Package files", if not, you can download and
-use putty.exe and puttygen.exe in the "Alternative binary files"
-section.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
deleted file mode 100644
index e0024f40d55..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Get-PuTTY-A-free-telnetSSH-client",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_paragraph_3",
-    "next_title": "account_windows_paragraph_4.2",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
deleted file mode 100644
index cebd1da3baf..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-The PuTTY package consists of several components, but we'll only use
-two:
-1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal)
-2.  PuTTYgen: an RSA and DSA key generation utility (to generate a key pair,
-    see Generate a public/private key pair)
-Generating a public/private key pair
-Before requesting a VSC account, you need to generate a pair of ssh
-keys. You need 2 keys, a public and a private key. You can visualise the
-public key as a lock to which only you have the key (your private key).
-You can send a copy of your lock to anyone without any problems, because
-only you can open it, as long as you keep your private key secure. To
-generate a public/private key pair, you can use the PuTTYgen key
-generator.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
deleted file mode 100644
index 534ebda0a1c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generating-a-publicprivate-key-pair",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/connecting/#open-a-terminal",
-        "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair"
-    },
-    "previous_title": "account_windows_paragraph_4.1",
-    "next_title": "account_windows_paragraph_4.3",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
deleted file mode 100644
index 6e65300562d..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Start PuTTYgen.exe it and follow these steps:
-1.  In "Parameters" (at the bottom of the window), choose "RSA" and set the number of
-    bits in the key to 4096.
-2.  Click on "Generate". To generate the key, you must move the mouse cursor over
-    the PuTTYgen window (this generates some random data that PuTTYgen
-    uses to generate the key pair). Once the key pair is generated, your
-    public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file".
-3.  Next, it is advised to fill in the "Key comment" field to make it easier
-    identifiable afterwards.
-4.  Next, you should specify a passphrase in the "Key passphrase" field and retype it in
-    the "Confirm passphrase" field. Remember, the passphrase protects the private key against
-    unauthorised use, so it is best to choose one that is not too easy
-    to guess but that you can still remember. Using a passphrase is not
-    required, but we recommend you to use a good passphrase unless you
-    are certain that your computer's hard disk is encrypted with a
-    decent password. (If you are not sure your disk is encrypted, it
-    probably isn't.)
-5.  Save both the public and private keys in a folder on your personal
-    computer (We recommend to create and put them in the folder
-    "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the
-    buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and
-    "id_rsa.ppk" for the private key.
-6.  Finally, save an "OpenSSH" version of your private key (in
-    particular for later "X2Go" usage, see x2go) by entering the
-    "Conversions" menu and selecting "Export OpenSSH key" (do not select the
-    "force new file format" variant). Save the file in the same location
-    as in the previous step with filename "id_rsa". (If there is no
-    "Conversions" menu, you must update your "puttygen" version. If you
-    want to do this conversion afterwards, you can start with loading an
-    existing "id_rsa.ppk" and only do this conversions export.)
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
deleted file mode 100644
index 4555638639d..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generating-a-publicprivate-key-pair",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/"
-    },
-    "previous_title": "account_windows_paragraph_4.2",
-    "next_title": "account_windows_paragraph_4.4",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt
deleted file mode 100644
index d0425d6738f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-If you use another program to generate a key pair, please remember that
-they need to be in the OpenSSH format to access the HPC clusters.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
deleted file mode 100644
index ebd55060657..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Generating-a-publicprivate-key-pair",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_windows_paragraph_4.3",
-    "next_title": "account_paragraph_5",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt
deleted file mode 100644
index b8dba743c0a..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-Using an SSH agent (optional)
-It is possible to setup a SSH agent in Windows. This is an optional
-configuration to help you to keep all your SSH keys (if you have
-several) stored in the same key ring to avoid to type the SSH key
-password each time. The SSH agent is also necessary to enable SSH hops
-with key forwarding from Windows.
-Pageant is the SSH authentication agent used in windows. This agent should be
-available from the PuTTY installation package
-<https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html> or as
-stand alone binary package.
-After the installation just start the Pageant application in Windows,
-this will start the agent in background. The agent icon will be visible
-from the Windows panel.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json
deleted file mode 100644
index 5fd697066b6..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Using-an-SSH-agent-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_paragraph_5",
-    "next_title": "account_windows_paragraph_6.2",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt
deleted file mode 100644
index 62ac04dd9aa..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-At this point the agent does not contain any private key. You should
-include the private key(s) generated in the previous section Generating a public/private key pair.
-1.  Click on "Add key"
-2.  Select the private key file generated in Generating a public/private key pair ("id_rsa.ppk" by default).
-3.  Enter the same SSH key password used to generate the key. After this
-    step the new key will be included in Pageant to manage the SSH
-    connections.
-4.  You can see the SSH key(s) available in the key ring just clicking
-    on "View Keys".
-5.  You can change PuTTY setup to use the SSH agent. Open PuTTY and check
-    Connection > SSH > Auth > Allow agent forwarding.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
deleted file mode 100644
index 11c69338029..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Using-an-SSH-agent-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair",
-        "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair"
-    },
-    "previous_title": "account_windows_paragraph_6.1",
-    "next_title": "account_windows_paragraph_6.3",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt
deleted file mode 100644
index 17c94975dec..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-Now you can connect to the login nodes as usual. The SSH agent will know
-which SSH key should be used and you do not have to type the SSH
-passwords each time, this task is done by Pageant agent automatically.
-It is also possible to use WinSCP with Pageant, see
-<https://winscp.net/eng/docs/ui_pageant> for more details.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json
deleted file mode 100644
index e33d002d248..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Using-an-SSH-agent-(optional)",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 3,
-    "directory": "account",
-    "parent_title": "Getting-ready-to-request-an-account",
-    "previous_title": "account_windows_paragraph_6.2",
-    "next_title": "account_paragraph_7",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt
deleted file mode 100644
index 9fd23612756..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-Applying for the account
-After you log in using your UGent login and password, you will be asked to
-upload the file that contains your public key, i.e., the file
-"id_rsa.pub" which you have generated earlier. Make sure that your
-public key is actually accepted for upload, because if it is in a wrong
-format, wrong type or too short, then it will be refused.
-This file should have been stored in the directory
-"C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh"
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json
deleted file mode 100644
index 87cda41283f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "account",
-    "subtitle": "Applying-for-the-account",
-    "source_file": "../../mkdocs/docs/HPC/account.md",
-    "title_depth": 2,
-    "directory": "account",
-    "parent_title": "account",
-    "previous_title": "account_paragraph_8",
-    "next_title": "account_paragraph_10",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/account/#applying-for-the-account"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
deleted file mode 100644
index 5aa8ca03374..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-First Time connection to the HPC infrastructure
-    A locale is a set of parameters that defines the user's language, country and
-    any special variant preferences that the user wants to see in their user
-    interface. Usually a locale identifier consists of at least a language
-    identifier and a region identifier.
-     Note
-        If you try to set a non-supported locale, then it will be automatically
-        set to the default. Currently the default is en_US.UFT-8 or en_US,
-        depending on whether your originally (non-supported) locale was UTF-8 or not.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json
deleted file mode 100644
index d4b02dbc9fb..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "First-Time-connection-to-the-HPC-infrastructure",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 2,
-    "directory": "connecting",
-    "parent_title": "Connecting-to-the-HPC-infrastructure",
-    "previous_title": "connecting_paragraph_10",
-    "next_title": "connecting_paragraph_12",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt
deleted file mode 100644
index 67e5e454852..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Transfer Files to/from the HPC
-Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back.
-WinSCP
-To transfer files to and from the cluster, we recommend the use of
-WinSCP, a graphical file management tool which can transfer files using
-secure protocols such as SFTP and SCP. WinSCP is freely available from
-<http://www.winscp.net>.
-To transfer your files using WinSCP,
-1.  Open the program
-2.  The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session"
-    1.  Click "New Site".
-    2.  Enter "login.hpc.ugent.be" in the "Host name" field.
-    3.  Enter your "vsc-account" in the "User name" field.
-    4.  Select "SCP" as the "file" protocol.
-    5.  Note that the password field remains empty.
-    
-    6.  Click "Advanced...".
-    7.  Click "SSH > Authentication".
-    8.  Select your private key in the field "Private key file".
-3.  Press the "Save" button, to save the session under "Session > Sites" for future access.
-4.  Finally, when clicking on "Login", you will be asked for your key passphrase.
-    
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json
deleted file mode 100644
index a4bbaee0f59..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "WinSCP",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "previous_title": "connecting_paragraph_11",
-    "next_title": "connecting_windows_paragraph_12.2",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt
deleted file mode 100644
index 82c71ac4129..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-The first time you make a connection to the login node, a Security
-Alert will appear and you will be asked to verify the authenticity of the
-login node.
-Make sure the fingerprint in the alert matches one of the following:
-- ssh-rsa 2048 10:2f:31:21:04:75:cb:ed:67:e0:d5:0c:a1:5a:f4:78
-- ssh-rsa 2048 SHA256:W8Wz0/FkkCR2ulN7+w8tNI9M0viRgFr2YlHrhKD2Dd0
-- ssh-ed25519 255 19:28:76:94:52:9d:ff:7d:fb:8b:27:b6:d7:69:42:eb
-- ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA
-- ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f
-- ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ
-If it does, press Yes, if it doesn't, please contact hpc@ugent.be.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json
deleted file mode 100644
index 80a8ef763a1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "WinSCP",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "previous_title": "connecting_windows_paragraph_12.1",
-    "next_title": "connecting_windows_paragraph_12.3",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt
deleted file mode 100644
index c0ffe6b4602..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255
-rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using.
-It is safe to ignore this 255 versus 256 difference, but the part after should be
-identical.
-Now, try out whether you can transfer an arbitrary file from your local
-machine to the HPC and back.
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json
deleted file mode 100644
index 07760730d56..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "WinSCP",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "previous_title": "connecting_windows_paragraph_12.2",
-    "next_title": "connecting_paragraph_13",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
deleted file mode 100644
index e45f4e63b85..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-First Time connection to the HPC infrastructure
-Open a Terminal
-You've generated a public/private key pair with PuTTYgen and have an
-approved account on the VSC clusters. The next step is to setup the
-connection to (one of) the HPC.
-In the screenshots, we show the setup for user
-"vsc20167"
-to the HPC cluster via the login node "login.hpc.ugent.be".
-1.  Start the PuTTY executable putty.exe in your directory
-    C:\Program Files (x86)\PuTTY and the configuration screen will pop
-    up. As you will often use the PuTTY tool, we recommend adding a
-    shortcut on your desktop.
-2.  Within the category <Session\>, in the field <Host Name\>, enter the name of the
-    login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to.
-    
-3.  In the category "Connection > Data", in the field "Auto-login username", put in <vsc40000\> , which is your VSC
-    username that you have received by e-mail after your request was
-    approved.
-    
-4.  In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key
-    (i.e., "id_rsa.ppk") that you generated and saved above.
-5.  In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox.
-6.  Now go back to <Session\>, and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to
-    store the session information.
-    
-7.  Now pressing "Open", will open a terminal window and asks for you
-    passphrase.
-    
-8.  If this is your first time connecting, you will be asked to verify
-    the authenticity of the login node. Please see
-    section Warning message when first connecting to new host
-    on how to do this.
-9.  After entering your correct passphrase, you will be connected to the
-    login-node of the HPC.
-10. To check you can now "Print the Working Directory" (pwd) and check
-    the name of the computer, where you have logged in (hostname):
-    $ pwd
-    /user/home/gent/vsc400/vsc40000
-    $ hostname -f
-    gligar07.gastly.os
-11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent")
-    from the list, "Load" it and press "Open".
diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
deleted file mode 100644
index d3b7d581c94..00000000000
--- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "connecting",
-    "subtitle": "Open-a-Terminal",
-    "source_file": "../../mkdocs/docs/HPC/connecting.md",
-    "title_depth": 3,
-    "directory": "connecting",
-    "parent_title": "First-Time-connection-to-the-HPC-infrastructure",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host"
-    },
-    "previous_title": "connecting_paragraph_3",
-    "next_title": "connecting_paragraph_5",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal"
-}
\ No newline at end of file

From 1ef1f10e6b05839f604fe65e2370599e580c2382 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 13:26:47 +0200
Subject: [PATCH 141/152] Changed paragraphs to decide length based on tokens
 instead of characters

---
 scripts/HPC_chatbot_preprocessor/README.md         |  2 +-
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
index 96a99498451..6cfd9be8231 100644
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -36,7 +36,7 @@ Including this option will split the source files based on the titles and subtit
 
 #### `pl`/`min_paragraph_length`
 
-This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 683 characters. This options only works if `split_on_titles` is not enabled.
+This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 512 tokens. This options only works if `split_on_titles` is not enabled.
 
 #### `td`/`max_title_depth`
 
diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index cff487f8589..7c3e63c0197 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -6,6 +6,7 @@
 import os
 import re
 import shutil
+import tiktoken
 import yaml
 from itertools import chain, tee, zip_longest
 from pathlib import Path
@@ -615,7 +616,12 @@ def paragraph_long_enough(paragraph, options):
     :return:
     """
     # TODO: change this into something that uses the tokenizer
-    return len(paragraph) >= options[MIN_PARAGRAPH_LENGTH]
+    encoding = tiktoken.get_encoding("cl100k_base")
+    token_amount = len(encoding.encode(paragraph))
+
+    print(token_amount)
+
+    return token_amount >= options[MIN_PARAGRAPH_LENGTH]
 
 
 def write_metadata(main_title, subtitle, links, title_level, directory, source_file):
@@ -1144,7 +1150,7 @@ def main(options):
         main_title = filename[:-3]
 
         # variable that keeps track of the directories that are used to write in at different levels
-        curr_dirs = [filename[:-3] for _ in range(5)]
+        curr_dirs = [filename[:-3] for _ in range(options[MAX_TITLE_DEPTH] + 1)]
 
         ################### actually parse the md file ###################
 
@@ -1212,7 +1218,7 @@ def main(options):
     parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located")
     parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to")
     parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.")
-    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=683, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)")
+    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=512, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)")
     parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)")
     parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
     parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled")

From 621c0a3f083966f2aaa097516767fdf2d4fdd559 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 13:27:57 +0200
Subject: [PATCH 142/152] Changed paragraphs to decide length based on tokens
 instead of characters

---
 scripts/HPC_chatbot_preprocessor/requirements.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt
index 4d27d462460..37137582aad 100644
--- a/scripts/HPC_chatbot_preprocessor/requirements.txt
+++ b/scripts/HPC_chatbot_preprocessor/requirements.txt
@@ -1,2 +1,4 @@
 PyYAML==6.0.2
-Jinja2==3.1.4
\ No newline at end of file
+Jinja2==3.1.4
+tiktoken~=0.7.0
+pathlib~=1.0.1
\ No newline at end of file

From adf364d1f897e433fbc2f0fcc80b8fdeb4f22a43 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 13:28:45 +0200
Subject: [PATCH 143/152] Changed paragraphs to decide length based on tokens
 instead of characters

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 7c3e63c0197..24e0b287a0a 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -615,12 +615,9 @@ def paragraph_long_enough(paragraph, options):
     :param options: dictionary containing the options given by the user
     :return:
     """
-    # TODO: change this into something that uses the tokenizer
     encoding = tiktoken.get_encoding("cl100k_base")
     token_amount = len(encoding.encode(paragraph))
 
-    print(token_amount)
-
     return token_amount >= options[MIN_PARAGRAPH_LENGTH]
 
 

From 32b8b741c8582a98b122b230742e1be09ba8c698 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 14:45:34 +0200
Subject: [PATCH 144/152] removing unnecessary files

---
 scripts/HPC_chatbot_preprocessor/README.md    |  196 ---
 .../chatbot_parser.py                         | 1236 -----------------
 .../HPC_chatbot_preprocessor/requirements.txt |    4 -
 .../generic/tps1/tps1_paragraph_1.txt         |    6 -
 .../tps1/tps1_paragraph_1_metadata.json       |   15 -
 .../generic/tps1/tps1_paragraph_3.txt         |    3 -
 .../tps1/tps1_paragraph_3_metadata.json       |   12 -
 .../linux/tps1/tps1_linux_paragraph_2.1.txt   |    4 -
 .../tps1_linux_paragraph_2.1_metadata.json    |   15 -
 .../linux/tps1/tps1_linux_paragraph_2.2.txt   |    3 -
 .../tps1_linux_paragraph_2.2_metadata.json    |   12 -
 .../macos/tps1/tps1_macos_paragraph_2.1.txt   |    4 -
 .../tps1_macos_paragraph_2.1_metadata.json    |   15 -
 .../macos/tps1/tps1_macos_paragraph_2.2.txt   |    3 -
 .../tps1_macos_paragraph_2.2_metadata.json    |   12 -
 .../tps1/tps1_windows_paragraph_2.1.txt       |    7 -
 .../tps1_windows_paragraph_2.1_metadata.json  |   15 -
 .../tps1/tps1_windows_paragraph_2.2.txt       |    6 -
 .../tps1_windows_paragraph_2.2_metadata.json  |   12 -
 .../tests/test_files/ftps/tps1.md             |   43 -
 .../tts1/Main-title/Subtitle-1/Subtitle-1.txt |    2 -
 .../Subtitle-1/Subtitle-1_metadata.json       |   12 -
 .../Main-title/Subtitle-5-g/Subtitle-5-g.txt  |    1 -
 .../Subtitle-5-g/Subtitle-5-g_metadata.json   |   12 -
 .../Main-title/Subtitle-2-g/Subtitle-2-g.txt  |    4 -
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   |   15 -
 .../Subtitle-4-l&m/Subtitle-4-l&m.txt         |    3 -
 .../Subtitle-4-l&m_metadata.json              |   15 -
 .../Main-title/Subtitle-2-g/Subtitle-2-g.txt  |    4 -
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   |   15 -
 .../Subtitle-4-l&m/Subtitle-4-l&m.txt         |    3 -
 .../Subtitle-4-l&m_metadata.json              |   15 -
 .../Main-title/Subtitle-2-g/Subtitle-2-g.txt  |    4 -
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   |   15 -
 .../Subtitle-3-w/Subtitle-3-w.txt             |    3 -
 .../Subtitle-3-w/Subtitle-3-w_metadata.json   |   15 -
 .../tests/test_files/ftts/tts1.md             |   31 -
 .../if_mangler_1_input.md                     |    4 -
 .../if_mangler_1_output.md                    |    4 -
 .../if_mangler_2_input.md                     |    7 -
 .../if_mangler_2_output.md                    |    7 -
 .../if_mangler_3_input.md                     |    6 -
 .../if_mangler_3_output.md                    |    6 -
 .../if_mangler_4_input.md                     |    4 -
 .../if_mangler_4_output.md                    |    4 -
 .../if_mangler_5_input.md                     |   11 -
 .../if_mangler_5_output.md                    |   11 -
 .../if_mangler_6_input.md                     |    8 -
 .../if_mangler_6_output.md                    |    8 -
 .../if_mangler_7_input.md                     |    9 -
 .../if_mangler_7_output.md                    |    9 -
 .../if_mangler_test_files/if_mangler_input.md |   55 -
 .../if_mangler_output.md                      |   55 -
 .../tests/test_files/list_file/list_test.md   |   15 -
 .../tests/test_full_script.py                 |   68 -
 .../tests/test_if_mangler.py                  |   32 -
 .../tests/test_insert_links.py                |   31 -
 .../tests/test_links.py                       |   69 -
 .../tests/test_lists.py                       |   27 -
 .../tests/test_make_valid_title.py            |   14 -
 .../tests/test_replace_markdown_markers.py    |   46 -
 .../tests/test_write_metadata.py              |   15 -
 62 files changed, 2317 deletions(-)
 delete mode 100644 scripts/HPC_chatbot_preprocessor/README.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/chatbot_parser.py
 delete mode 100644 scripts/HPC_chatbot_preprocessor/requirements.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_links.py
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_lists.py
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
 delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
deleted file mode 100644
index 6cfd9be8231..00000000000
--- a/scripts/HPC_chatbot_preprocessor/README.md
+++ /dev/null
@@ -1,196 +0,0 @@
-# Chatbot parser
-
-`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot.
-
-## Usage
-
-The script can be ran in a shell environment with the following command:
-
-```shell
-python chatbot_parser.py
-```
-
-This command has the following possible options:
-
-```shell
-chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd]
-```
-
-### Options
-
-#### `h`/`help`
-
-Display the help message
-
-#### `src`/`source`
-
-This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`).
-
-#### `dst`/`destination`
-
-This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script.
-
-#### `st`/`split_on_titles`
-
-Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length.
-
-#### `pl`/`min_paragraph_length`
-
-This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 512 tokens. This options only works if `split_on_titles` is not enabled.
-
-#### `td`/`max_title_depth`
-
-This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4.
-
-#### `l`/`links`
-
-Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext.
-
-#### `dd`/`deep_directories`
-
-Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled.
-
-## Generated file structure
-
-The generated directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: 
-
-- `generic` contains the parts of the markdown sources that were non-OS-specific
-- `os_specific` contains the parts of the markdown sources that were OS-specific
-
-Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation.
-
-Both the generic and each of the three os-specific directories then contain a directory for each source file. 
-
-If the option `deep_directories` is not enabled, all paragraphs of the source file and their corresponding metadata will be saved in this directory. The (processed) plaintext of the paragraph is written to a `.txt` file and the metadata is written to a `.json` file.
-
-If the option `deep_directories` is enabled, the directory of each source file will contain a subdirectory structure corresponding to the structure of the subtitles at different levels in the source file. Each subtitle in the source file corresponds to a directory nested in the directory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). 
-
-Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and a `.json` file with the metadata of that section.
-
-## Requirements
-
-- The required Python packages are listed in `requirements.txt`
-
-## Restrictions on source-files
-
-Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input.
-
-### Nested if structures
-
-The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. 
-
-#### Examples of valid and invalid if-structures
-
-##### Allowed
-
-###### non-os-related in os-related
-
-This is an example of one of the basic allowed if-structures (`NON_OS_IF_IN_OS_IF`)
-
-```
-if OS == windows:
-  if site == Gent:
-    ...
-  endif
-endif
-```
-
-###### os-related in os-related in non-os-related
-
-This is an example of the basic allowed if-structure `OS_IF_IN_OS_IF` nested in a non-os-specific if.
-
-```
-if site == Gent:
-  if OS == windows:
-    ...
-  else:
-    if OS == Linux:
-      ...
-    endif
-  endif
-endif
-```
-
-##### Not allowed
-
-###### non-os-related in os-related in os-related
-
-This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures (`OS_IF_IN_OS_IF`).
-
-```
-if OS != windows:
-  if OS == Linux:
-    if site == Gent:
-      ...
-    endif
-  endif
-endif
-```
-
-This will result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it.
-
-###### os-related in non-os-related in os-related
-
-This is an example of the basic allowed if-structure `OS_IF` (indirectly) nested in an os-specific if-structure.
-
-```
-if OS != windows:
-  if site == Gent:
-    if OS == Linux:
-      ...
-    endif
-  endif
-endif
-```
-
-This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it.
-
-### Non OS-related if-statements
-
-Due to the way jinja parses the sourcefiles, the script slightly alters non os-specific if-statements as well. It expects if-statements of the following form:
-
-```
-{%- if site == gent %}
-{% if site != (gent or brussel) %}
-```
-
-All spaces and the dash are optional. City names don't need to be fully lowercase since the parser will capitalize them properly anyway.
-
-### html syntax
-
-The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. 
-
-### Comments
-
-Any comments within the markdown files (for example TODO's) should follow the following syntax:
-
-```
-<!--your comment-->
-```
- and should be limited to one line.
-
-Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such.
-
-```
-<!--INPUT_FOR_BOT: your comment for the bot-->
-```
-
-This will be reworked to
- 
-```
-your comment for the bot
-```
-
-in the final output.
-
-### Long filepaths
-
-Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`.
-
-### Markdown lists
-
-The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle  list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays.
-
-### Links
-
-Part of the metadata of the parser are links. In order for the links to be built up in the right way, links to external sites should always start with either `https://` or `http://`.
diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
deleted file mode 100644
index 24e0b287a0a..00000000000
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ /dev/null
@@ -1,1236 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import copy
-import json
-import os
-import re
-import shutil
-import tiktoken
-import yaml
-from itertools import chain, tee, zip_longest
-from pathlib import Path
-from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template
-
-#################### define macro's ####################
-# options
-SOURCE_DIRECTORY = "SOURCE_DIRECTORY"
-DESTINATION_DIRECTORY = "DESTINATION_DIRECTORY"
-SPLIT_ON_TITLES = "SPLIT_ON_TITLES"
-MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH"
-MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH"
-INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT"
-SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS"
-DEEP_DIRECTORIES = "DEEP_DIRECTORIES"
-VERBOSE = "VERBOSE"
-
-# directories
-PARSED_MDS = "parsed_mds"
-COPIES = "copies"
-IF_MANGLED_FILES = "if_mangled_files"
-LINUX_TUTORIAL = "linux-tutorial"
-RETURN_DIR = ".."
-MKDOCS_DIR = "mkdocs"
-DOCS_DIR = "docs"
-HPC_DIR = "HPC"
-EXTRA_DIR = "extra"
-GENERIC_DIR = "generic"
-OS_SPECIFIC_DIR = "os_specific"
-MACROS = "macros"
-
-# OSes
-LINUX = "linux"
-WINDOWS = "windows"
-MACOS = "macos"
-GENERIC = "generic"
-LINK_OS = {LINUX: "Linux", WINDOWS: "Windows", MACOS: "macOS"}  # OS needs different capitalisation for use in links
-
-# urls
-REPO_URL = 'https://github.com/hpcugent/vsc_user_docs'
-DOCS_URL = "https://docs.hpc.ugent.be"
-
-# OS-related if-states
-ACTIVE = "active"
-INACTIVE = "inactive"
-
-# if mangler states
-NON_OS_IF = 0
-NON_OS_IF_IN_OS_IF = 1
-OS_IF = 2
-OS_IF_IN_OS_IF = 3
-
-# if mangler macros
-IF_MANGLED_PART = "-if-"
-
-# actions
-DONE = "done"
-WRITE_TEXT = "write_text"
-CHECK_EXTRA_MESSAGE = "check_extra_message"
-WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message"
-
-# Metadata attributes
-SOURCE_FILE = "source_file"
-MAIN_TITLE = "main_title"
-SUBTITLE = "subtitle"
-TITLE_DEPTH = "title_depth"
-DIRECTORY = "directory"
-LINKS = "links"
-PARENT_TITLE = "parent_title"
-PREVIOUS_SUBTITLE = "previous_title"
-NEXT_SUBTITLE = "next_title"
-METADATA_OS = "OS"
-REFERENCE_LINK = "reference_link"
-
-# if-structure components
-IF = "if"
-ELSE = "else"
-ENDIF = "endif"
-
-# link indicator
-LINK_MARKER = r'§link§link§'
-
-# HTML tags
-HTML_TAGS = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"]  # make sure these are always lowercase
-
-# regex patterns
-IF_MANGLED_PATTERNS = {
-        IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})',
-        ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})',
-        ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})'
-    }
-
-# filenames (and parts of filenames)
-TEMP_JINJA_FILE = "jinja_file.txt"
-_PARAGRAPH_ = "_paragraph_"
-METADATA_EXTENSION = "_metadata"
-
-# Marker for comments for the bot
-INPUT_FOR_BOT = "INPUT_FOR_BOT: "
-
-# Standard strings for verbose output
-LINE = "------------------------------------------------------------------------------------------------------\n"
-
-
-################### define functions ###################
-
-def check_for_title(line, in_code_block, curr_dirs, options):
-    """
-    function that checks for titles in the current line. Used by split_text to split the text among the subtitles
-
-    :param line: the current line to be checked for a title
-    :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles
-    :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found
-    :param options: dictionary containing the options given by the user
-    :return title_length: The amount of hashtags in front of the title on the current line
-    """
-    # detect titles
-    match = re.match(r'^#+ ', line)
-    if match and len(match.group(0)) <= options[MAX_TITLE_DEPTH] + 1 and not in_code_block:
-        title_length = len(match.group(0)) - 1
-        if options[DEEP_DIRECTORIES]:
-            curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-')))
-
-            # update the higher order current directories
-            for i in range(title_length + 1, options[MAX_TITLE_DEPTH] + 1):
-                curr_dirs[i] = curr_dirs[title_length]
-
-        return title_length
-    else:
-        return 0
-
-
-def make_valid_link(link, main_title, is_linux_tutorial):
-    """
-    Function that converts a string to a valid link to be used in the metadata
-
-    :param link: the input string to be turned into a valid link
-    :param main_title: the main title of the file that contains the link
-    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
-    :return link: the valid link
-    """
-
-    # ugly fix for problem with links
-    linux_tutorial_files = ["beyond_the_basics", "common_pitfalls", "getting_started", "hpc_infrastructure", "index", "manipulating_files_and_directories", "navigating", "uploading_files"]
-    if is_linux_tutorial and any([linux_tutorial_files[i] in link for i in range(len(linux_tutorial_files))]):
-        linux_part = LINUX_TUTORIAL + '/'
-    else:
-        linux_part = ""
-
-    if link.startswith('http://') or link.startswith('https://') or link.startswith('mailto:'):
-        pass
-    else:
-        if link.startswith("./"):
-            link = link.replace('./', '')
-        elif link.startswith("../"):
-            link = link.replace('../', '')
-
-        if link.startswith("#"):
-            link = DOCS_URL + '/' + linux_part + main_title + "/" + link
-        elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]):
-            link = DOCS_URL + '/' + linux_part + link.replace(".md", "")
-        elif '.md#' in link:
-            link = DOCS_URL + '/' + linux_part + link.replace(".md", "/")
-        else:
-            link = DOCS_URL + '/' + linux_part + link
-
-    link = link.replace('index/', '').replace('/index', '')
-
-    return link
-
-
-def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_linux_tutorial):
-    """
-    function that replaces certain markdown structures with the equivalent used on the website
-
-    :param curr_line: the current line on which markdown structures need to be replaced
-    :param linklist: the list used to store links that need to be printed at the end of the file
-    :param in_code_block: boolean indicating whether the current line is part of a code block
-    :param main_title: the main title of the file that is being processed
-    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
-    :return curr_line: the adapted current line
-    :return linklist: the updated linklist
-    """
-
-    # replace images with an empty line
-    if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[.*?]\(img/.*?\.png\)', curr_line):
-        curr_line = ""
-
-    # replace links with a reference
-    matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line)
-    if matches:
-        for match in matches:
-            curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER)
-
-            linklist.append(make_valid_link(match[1], main_title, is_linux_tutorial))
-
-    # codeblock (with ``` -> always stands on a separate line, so line can be dropped)
-    if '```' in curr_line:
-        curr_line = ""
-
-    # structures within <>
-    match = re.findall(r'<(.*?)>', curr_line)
-    if match:
-        for i, content in enumerate(match):
-            html_tags_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in HTML_TAGS]))
-            html_tags_style = [element + " style=.*" for element in HTML_TAGS]
-
-            # add references for every link of format <a href=...>
-            if re.search(r'a href=.*', content):
-                link = content[7:]
-                curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist)) + LINK_MARKER, curr_line)
-                linklist.append(link)
-
-            # drop the syntax words
-            elif content.lower() in html_tags_variations:
-                curr_line = re.sub(f'<{content}>', "", curr_line)
-
-            # drop the version of the HTML_TAGS followed by " style="
-            elif any(re.match(pattern, content) for pattern in html_tags_style):
-                curr_line = re.sub(r'<.*?>', "", curr_line)
-
-            # keep comments for bot
-            elif re.fullmatch(r'!--' + INPUT_FOR_BOT + r'.*?--', content):
-                curr_line = re.sub(r'<!--' + INPUT_FOR_BOT + r'(.*?)-->', lambda m: m.group(1), curr_line)
-
-            # drop comments
-            elif re.fullmatch(r'!--.*?--', content):
-                curr_line = re.sub(r'<.*?>', "", curr_line)
-
-            # drop the <> around links
-            elif re.match(r'http://', content) or re.match(r'https://', content):
-                curr_line = re.sub(r'<' + content + '>', content, curr_line )
-
-            # keep the rest
-            else:
-                pass
-
-    # structures with !!! (info, tips, warnings)
-    if '!!!' in curr_line:
-        curr_line = re.sub(r'!!!', "", curr_line)
-
-    # structures with ??? (collapsable admonitions)
-    if '???' in curr_line:
-        curr_line = re.sub(r'\?\?\?', "", curr_line)
-
-    # get rid of other indicators (`, *, +, _)
-    if not in_code_block:
-
-        backquotes = re.findall(r'`(.*?)`', curr_line)
-        if backquotes:
-            for i, content in enumerate(backquotes):
-                curr_line = curr_line.replace(f"`{content}`", content)
-
-        asterisks = re.findall(r'(?<!\\)(\*+)(.+?)\1', curr_line)
-        if asterisks:
-            for i, content in enumerate(asterisks):
-                curr_line = re.sub(r"(\*+)" + content[1] + r"\1", content[1], curr_line)
-
-        pluses = list(set(re.findall(r'\+\+([^ ]+?)\+\+', curr_line) + re.findall(r'\+\+(".+?")\+\+', curr_line)))
-        if pluses:
-            for i, content in enumerate(pluses):
-                curr_line = re.sub(r"\+\+" + content + r"\+\+", content, curr_line)
-
-        underscores = re.findall(r' (_+)(.+?)\1 ', curr_line)
-        if underscores:
-            for i, content in enumerate(underscores):
-                curr_line = re.sub(r"(_+)" + content[1] + r"\1", content[1], curr_line)
-
-    return curr_line, linklist
-
-
-def split_text(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC):
-    """
-    Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata
-
-    :param file: the filepath of the file to be split
-    :param main_title: the main title of the file
-    :param options: dictionary containing the options given by the user
-    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
-    :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level
-    :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level
-    :return paragraphs_text: dictionary containing the split sections of text
-    :return paragraphs_metadata: dictionary containing the metadata of each split section of text
-    :return subtitle_order: list containing all encountered subtitles in order of appearance
-    """
-
-    if options[SPLIT_ON_TITLES]:
-        return split_on_titles(file, main_title, options, is_linux_tutorial)
-    elif options[SPLIT_ON_PARAGRAPHS]:
-        return split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number, OS)
-
-
-def split_on_titles(file, main_title, options, is_linux_tutorial):
-    """
-    Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata
-
-    :param file: the filepath of the file to be split
-    :param main_title: the main title of the file
-    :param options: dictionary containing the options given by the user
-    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
-    :return paragraphs_text: dictionary containing the split sections of text
-    :return paragraphs_metadata: dictionary containing the metadata of each split section of text
-    :return subtitle_order: list containing all encountered subtitles in order of appearance
-    """
-
-    if options[VERBOSE]:
-        print("Splitting on titles\n")
-
-    # start of assuming we haven't encountered a title
-    after_first_title = False
-
-    # start of assuming we are not in a code_block
-    in_code_block = False
-
-    # define initial dictionaries
-    paragraphs_os_free_text = {}
-    paragraphs_os_text = {}
-    paragraphs_metadata = {}
-
-    # variable to keep track of the current paragraph
-    current_paragraph = ""
-
-    # list to keep track of links in the text
-    link_list = []
-
-    # list to keep track of the order of the subtitles
-    subtitle_order = []
-
-    # variable to keep track of how many if-statements deep the current line is
-    in_if_statement = 0
-
-    # variable to indicate that previous section was one with if-statements
-    previous_contained_if = False
-
-    # list to keep track of most recent directories on each title level
-    curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
-
-    with open(file, 'r') as readfile:
-
-        for line in readfile:
-
-            # detect if-statements starting or ending on the current line
-            in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(re.findall(IF_MANGLED_PATTERNS[ENDIF], line))
-
-            # detect codeblocks to make sure titles aren't detected in them
-            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
-                in_code_block = not in_code_block
-                if options[VERBOSE]:
-                    if in_code_block:
-                        print("Detected start of a codeblock, not registering titles")
-                    else:
-                        print("Detected end of codeblock, registering titles again")
-
-            # only split up if current line is in a fully non-os-specific section
-            if in_if_statement == 0:
-
-                title_level = check_for_title(line, in_code_block, curr_dirs, options)
-
-                # line is a title with a maximum depth of 4
-                if title_level > 0:
-                    if after_first_title:
-
-                        # write text of previous file
-                        if previous_contained_if:
-                            paragraphs_os_text[title] = current_paragraph
-                            if options[VERBOSE]:
-                                print("Saved os-specific chunk with temporary title: " + title + "\n")
-                        else:
-                            paragraphs_os_free_text[title] = current_paragraph
-                            if options[VERBOSE]:
-                                print("Saved generic chunk with title: " + title + "\n")
-
-                        # write metadata of previous file
-                        paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
-
-                    # make a new title
-                    title = make_valid_title(line[title_level + 1:-1])
-
-                    # create an entry for the file in the paragraphs text dictionary
-                    current_paragraph = ""
-
-                    after_first_title = True
-                    subtitle_order.append(title)
-
-                    # reset link_list
-                    link_list = []
-
-                    previous_contained_if = False
-
-                # line is not a title
-                elif after_first_title:
-                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
-                    if line != "\n":
-                        current_paragraph += line
-
-                # keep track of title level and directory to write to metadata upon discovering a new subtitle
-                if title_level > 0:
-                    last_title_level = title_level
-                    last_dir = curr_dirs[last_title_level]
-            else:
-                previous_contained_if = True
-                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
-                if line != "\n":
-                    current_paragraph += line
-
-    # write dictionaries for the last file
-    if previous_contained_if:
-        paragraphs_os_text[title] = current_paragraph
-        if options[VERBOSE]:
-            print("Saved os-specific chunk with temporary title: " + title + "\n")
-    else:
-        paragraphs_os_free_text[title] = current_paragraph
-        if options[VERBOSE]:
-            print("Saved generic chunk with title: " + title + "\n")
-    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
-
-    return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
-
-
-def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC):
-    """
-    Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata
-
-    :param file: the filepath of the file to be split
-    :param main_title: the main title of the file
-    :param options: dictionary containing the options given by the user
-    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
-    :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph
-    :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph
-    :return paragraphs_text: dictionary containing the split sections of text
-    :return paragraphs_metadata: dictionary containing the metadata of each split section of text
-    :return subtitle_order: list containing all encountered subtitles in order of appearance
-    """
-
-    if options[VERBOSE]:
-        print("Splitting on paragraphs\n")
-
-    # start of assuming we are not in a code_block
-    in_code_block = False
-
-    # define initial dictionaries
-    paragraphs_os_free_text = {}
-    paragraphs_os_text = {}
-    paragraphs_metadata = {}
-
-    # variable to keep track of the current paragraph
-    current_paragraph = ""
-
-    # list to keep track of links in the text
-    link_list = []
-
-    # list to keep track of the order of the subtitles
-    subtitle_order = []
-
-    # variable to keep track of how many if-statements deep the current line is
-    in_if_statement = 0
-
-    # variable to indicate that previous section was one with if-statements
-    previous_contained_if = False
-
-    # variable to indicate that the previous line was part of a list
-    in_list = False
-
-    # paragraph number to add to title
-    paragraph_number = 1
-
-    # metadata title
-    metadata_title = main_title
-
-    # define metadata data if split occurs on paragraphs and last_title and title_level are known (will be replaced later on in the process)
-    if current_paragraph_number != -1:
-        last_title_level = 4
-        last_dir = "PLACEHOLDER"
-
-    # list to keep track of most recent directories on each title level
-    curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
-
-    with open(file, 'r') as readfile:
-
-        # Create two independent iterators from the original file iterator (needed to check for lists)
-        current_line, next_line = tee(readfile)
-
-        # Advance the next_line iterator by one step, so it is always one step ahead
-        next(next_line, None)
-
-        # Process the lines
-        for line, nxt in zip_longest(current_line, next_line, fillvalue=""):
-
-            # detect if-statements starting or ending on the current line
-            in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(
-                re.findall(IF_MANGLED_PATTERNS[ENDIF], line))
-
-            # detect whether the current line is in a list
-            if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line):  # beginning of a list entry
-                in_list = True
-                if options[VERBOSE]:
-                    print("First line of new list entry found, not starting new paragraphs: " + line[:-1])
-            elif re.search(r'^\s{2,}.+$', line) and in_list:  # middle of a list entry
-                pass
-            elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list:  # line(s) between list entries
-                pass
-            elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', nxt):
-                in_list = True
-            elif in_list:
-                if options[VERBOSE]:
-                    print("List ended, starting new paragraphs again")
-                in_list = False
-            else:
-                in_list = False
-
-            # detect codeblocks to make sure titles aren't detected in them
-            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
-                in_code_block = not in_code_block
-                if options[VERBOSE]:
-                    if in_code_block:
-                        print("Detected start of a codeblock, not starting new paragraphs")
-                    else:
-                        print("Detected end of codeblock, starting new paragraphs again")
-
-            # only split up if current line is in a fully non-os-specific section
-            if in_if_statement == 0:
-
-                title_level = check_for_title(line, in_code_block, curr_dirs, options)
-
-                # check whether a new paragraph should be started
-                if line == "\n" and paragraph_long_enough(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph), options) and not in_code_block and not in_list:
-
-                    # create a title for the previous paragraph
-                    if current_paragraph_number == -1:
-                        paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number)
-                    else:
-                        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number)
-                    paragraph_number += 1
-
-                    # write text of previous file
-                    if previous_contained_if:
-                        paragraphs_os_text[paragraph_title] = current_paragraph
-                        if options[VERBOSE]:
-                            print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n")
-                    else:
-                        paragraphs_os_free_text[paragraph_title] = current_paragraph
-                        if options[VERBOSE]:
-                            print("Saved generic chunk with title: " + paragraph_title + "\n")
-
-                    # write metadata of previous file
-                    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
-                    subtitle_order.append(paragraph_title)
-
-                    # reset the current paragraph
-                    current_paragraph = ""
-
-                    # reset link_list
-                    link_list = []
-
-                    previous_contained_if = False
-
-                # line is a title with a maximum depth of 4
-                elif title_level > 0:
-
-                    # make a new title
-                    metadata_title = make_valid_title(line[title_level + 1:-1])
-
-                    line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title, is_linux_tutorial)
-                    current_paragraph += line
-
-                # line is not a title or the beginning of a new paragraph
-                elif line != "\n" or previous_contained_if:
-                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
-                    current_paragraph += line
-
-                # keep track of title level and directory to write to metadata upon discovering a new subtitle
-                if title_level > 0:
-                    last_title_level = title_level
-                    last_dir = curr_dirs[last_title_level]
-            else:
-                previous_contained_if = True
-                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
-                current_paragraph += line
-
-    # create a title for the last paragraph
-    if current_paragraph_number == -1:
-        paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number)
-    else:
-        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number)
-
-    # write dictionaries for the last file
-    if previous_contained_if:
-        paragraphs_os_text[paragraph_title] = current_paragraph
-        if options[VERBOSE]:
-            print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n")
-    else:
-        paragraphs_os_free_text[paragraph_title] = current_paragraph
-        if options[VERBOSE]:
-            print("Saved generic chunk with title: " + paragraph_title + "\n")
-    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
-    subtitle_order.append(paragraph_title)
-
-    return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
-
-
-def paragraph_long_enough(paragraph, options):
-    """
-    Function that checks if the paragraph is long enough to be split of
-
-    :param paragraph: current paragraph
-    :param options: dictionary containing the options given by the user
-    :return:
-    """
-    encoding = tiktoken.get_encoding("cl100k_base")
-    token_amount = len(encoding.encode(paragraph))
-
-    return token_amount >= options[MIN_PARAGRAPH_LENGTH]
-
-
-def write_metadata(main_title, subtitle, links, title_level, directory, source_file):
-    """
-    Function that writes metadata about a text section to a dictionary
-
-    :param main_title: The main title of the file containing the section
-    :param subtitle: the title of the section
-    :param links: a list of links contained within the section
-    :param title_level: the depth of the title of the section
-    :param directory: the directory where the section will eventually be written (can either be generic or os-specific)
-    :param source_file: the source file that the section originates from
-    :return paragraph_metadata: dictionary containing the metadata about the section
-    """
-
-    paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory}
-
-    if len(links) > 0:
-        paragraph_metadata[LINKS] = {}
-        for i, link in enumerate(links):
-            paragraph_metadata[LINKS][str(i)] = link
-
-    paragraph_metadata[PARENT_TITLE] = Path(directory).parent.name
-
-    return paragraph_metadata
-
-
-def jinja_parser(filename, copy_location, options):
-    """
-    function that let's jinja do its thing to format the files except for the os-related if-statements
-
-    :param filename: the name of the file that needs to be formatted using jinja
-    :param copy_location: the location of the file that needs to be formatted using jinja
-    :param options: dictionary containing the options given by the user
-    :return:
-    """
-    # YAML file location
-    yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml')
-
-    if options[VERBOSE]:
-        print("Reading YAML file from location: " + yml_file_path)
-
-    # Read the YAML file
-    with open(yml_file_path, 'r') as yml_file:
-        words_dict = yaml.safe_load(yml_file)
-
-    # ugly fix for index.md error that occurs because of the macro "config.repo_url" in mkdocs/docs/HPC/index.md
-    additional_context = {
-        'config': {
-            'repo_url': REPO_URL
-        }
-    }
-    combined_context = {**words_dict, **additional_context}
-
-    if options[VERBOSE]:
-        print("Mangling OS-specific if-statements")
-
-    # Mangle the OS-related if-statements
-    mangle_ifs(copy_location, filename, options)
-
-    if options[VERBOSE]:
-        print("Altering other if-statements to parse properly")
-
-    # Alter the other if-statements
-    alter_ifs(filename, options)
-
-    # Use Jinja2 to replace the macros
-    template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)])
-    templateEnv = Environment(loader=template_loader)
-    template = templateEnv.get_template(filename)
-    rendered_content = template.render(combined_context)
-
-    if options[VERBOSE]:
-        print("jinja parsing finished\nWriting jinja-parsed file to location: " + copy_location)
-
-    # Save the rendered content to a new file
-    with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file:
-        output_file.write(rendered_content)
-
-
-def load_macros(name):
-    """
-    function used by the jinja FunctionLoader to retrieve templates from the macros folder since the normal FileSystemLoader can't locate them properly
-
-    :param name: name of the package
-    :return:
-    """
-
-    macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, MACROS)
-
-    if "../" + MACROS + "/" in name:
-        package_name = name.split("../" + MACROS + "/")[1]
-        file_location = os.path.join(macros_location, package_name)
-
-        with open(file_location, 'r') as readfile:
-            return readfile.read()
-
-
-def mangle_os_ifs(line, is_os, options):
-    """
-    function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure.
-    We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements.
-
-    :param line: the current line to check for os-related if-statements
-    :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF
-        NON_OS_IF: not in an os-if
-        NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if
-        OS_IF: in an os-if
-        OS_IF_IN_OS_IF: in an os-if nested in an os-if
-    :param options: dictionary containing the options given by the user
-    :return line: the modified line with  mangled os-related if-statements
-    """
-
-    match = re.search(r'\{%(.*?)%}(.*)', line)
-
-    start_index = 0
-    added_length = 0
-
-    while match:
-
-        constr_match = re.search(r'\{%.*?%}', match.string)
-        if_match = re.search(r'if ', match.group(1))
-        if_os_match = re.search(r'if OS', match.group(1))
-        endif_match = re.search(r'endif', match.group(1))
-        else_match = re.search(r'else', match.group(1))
-
-        # mangle positions
-        pos_first_mangle = constr_match.start() + start_index + added_length + 1
-        pos_second_mangle = constr_match.end() + start_index + added_length - 1
-
-        # different parts of the original string
-        part_before_mangling = line[:pos_first_mangle]
-        part_between_mangling = line[pos_first_mangle:pos_second_mangle]
-        part_after_mangling = line[pos_second_mangle:]
-
-        # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these
-        if endif_match:
-            if is_os in (OS_IF, OS_IF_IN_OS_IF):
-                if options[VERBOSE]:
-                    print("OS-specific endif statement found in line: " + line[:-1])
-                line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
-                added_length += 2 * len(IF_MANGLED_PART)
-                if is_os == OS_IF:
-                    is_os = NON_OS_IF
-                elif is_os == OS_IF_IN_OS_IF:
-                    is_os = OS_IF
-            elif is_os == NON_OS_IF_IN_OS_IF:
-                is_os = OS_IF
-
-        elif if_match:
-            if if_os_match:
-                if options[VERBOSE]:
-                    print("OS-specific if statement found in line:    " + line[:-1])
-                line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
-                added_length += 2 * len(IF_MANGLED_PART)
-                if is_os == OS_IF:
-                    is_os = OS_IF_IN_OS_IF
-                else:
-                    is_os = OS_IF
-            else:
-                if is_os == OS_IF:
-                    is_os = NON_OS_IF_IN_OS_IF
-                else:
-                    is_os = NON_OS_IF
-
-        elif else_match:
-            if is_os in (OS_IF, OS_IF_IN_OS_IF):
-                if options[VERBOSE]:
-                    print("OS-specific else statement found in line:  " + line[:-1])
-                line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
-                added_length += 2 * len(IF_MANGLED_PART)
-
-        start_index += constr_match.end()
-        match = re.search(r'\{%(.*?)%}(.*)', match.group(2))
-    return line, is_os
-
-
-def mangle_ifs(directory, filename, options):
-    """
-    function that writes the if-mangled version of a file to a location where the jinja parser will use it
-
-    :param directory: the directory of the file to be if mangled
-    :param filename: the filename of the file to be mangled
-    :param options: dictionary containing the options given by the user
-    :return:
-    """
-    # variable to keep track of latest if-statement scope
-    is_os = NON_OS_IF
-
-    with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'w') as write_file:
-        with open(directory, 'r') as read_file:
-            for line in read_file:
-                new_line, is_os = mangle_os_ifs(line, is_os, options)
-                write_file.write(new_line)
-
-
-def alter_ifs(filename, options):
-    """
-    Function that further adapts the if-statements in a file and writes it to a location where the jinja parser will use it.
-    This is because the jinja parser doesn't seem to be able to handle statements like {% site == gent %} with context {'site': 'Gent'} in this case.
-    These statements get changed to {% site == 'Gent' %} in this function.
-
-    :param filename: the filename of the file to be transformed
-    :param options: dictionary containing the options given by the user
-    :return:
-    """
-
-    with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'r') as read_file:
-        content = read_file.read()
-
-    pattern = r'(\{%-?\s?[a-zA-Z\s]*?[!=]=\s?\(?)([a-zA-Z\s]+(?:\sor\s[a-zA-Z\s]+)*)(\)?\s?%})'
-    content = re.sub(pattern,
-                     lambda match: (f"{match.group(1)}" +
-                                    " or ".join([f"'{city.strip().capitalize()}'" for city in match.group(2).split(" or ")]) +
-                                    f"{match.group(3)}"
-                                    ),
-                     content)
-
-    with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'w') as write_file:
-        write_file.write(content)
-
-
-def make_valid_title(title):
-    """
-    function that makes sure all titles can be used as valid filenames
-
-    :param title: the string that will be used as title and filename
-    :return valid_filename: the adapted title that can be used as filename
-    """
-    # Define a regex pattern for invalid characters on both Windows and Linux
-    invalid_chars = r'[<>:"/\\|?*\0]'
-
-    # get rid of extra information between {} brackets
-    title = re.sub(r'\{.*?}', '', title)
-
-    # Remove invalid characters
-    valid_filename = re.sub(invalid_chars, '', title)
-
-    # Strip leading/trailing whitespace
-    valid_filename = valid_filename.strip().strip('-').replace(' ', '-').replace("--", "-")
-
-    return valid_filename
-
-
-def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options, is_linux_tutorial):
-    """
-    Function that writes text and metadata of a generic (non-os-specific) file
-
-    :param title: title of section
-    :param paragraphs_text: dictionary containing all paragraphs of text
-    :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
-    :param title_order: list containing all subtitles in order
-    :param title_order_number: order number of the title of the section that is being written
-    :param options: dictionary containing the options given by the user
-    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
-    :return:
-    """
-
-    if len(paragraphs_text[title]) > 0:
-        # make the directory needed for the files that will be written
-        filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
-        os.makedirs(filepath, exist_ok=True)
-
-        if options[VERBOSE]:
-            print("Writing generic section " + title + " to filepath: " + str(filepath))
-
-        write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial)
-    else:
-        # don't write empty files
-        pass
-
-
-def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options, is_linux_tutorial):
-    """
-    Function to write files to a certain filepath
-
-    :param title: title of the section to be written
-    :param text: section of text to be written
-    :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
-    :param title_order: list containing all subtitles in order
-    :param title_order_number: order number of the title of the section that is being written
-    :param filepath: filepath to write files to
-    :param OS: OS to be included in the metadata
-    :param options: dictionary containing the options given by the user
-    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
-    :return:
-    """
-
-    metadata = copy.deepcopy(paragraphs_metadata[title])
-
-    file_title = title
-
-    # write text file
-    with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile:
-        if LINKS in paragraphs_metadata[title].keys():
-            adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS], options)
-            writefile.write(adapted_text)
-        else:
-            writefile.write(text)
-
-    # write metadata
-    # check if links in metadata is not empty
-    if LINKS in metadata.keys() and len(metadata[LINKS].keys()) == 0:
-        del metadata[LINKS]
-
-    # add previous subtitle
-    if title_order_number != 0:
-        metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1]
-    else:
-        metadata[PREVIOUS_SUBTITLE] = None
-
-    # add next subtitle
-    if title_order_number != len(title_order) - 1:
-        metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1]
-    else:
-        metadata[NEXT_SUBTITLE] = None
-
-    # add OS
-    metadata[METADATA_OS] = OS
-
-    # add reference link
-    if is_linux_tutorial:
-        linux_part = LINUX_TUTORIAL + "/"
-    else:
-        linux_part = ""
-    if OS == GENERIC:
-        os_part = ""
-    else:
-        os_part = LINK_OS[OS] + "/"
-    if "index" not in paragraphs_metadata[title][MAIN_TITLE]:
-        metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-')
-    else:
-        metadata[REFERENCE_LINK] = DOCS_URL
-
-    # write metadata to file
-    with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile:
-        json.dump(metadata, writefile, indent=4)
-
-
-def insert_links(text, links, options):
-    """
-    Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT
-
-    :param text: The plaintext that needs to be adapted
-    :param links: The links that might need to be inserted
-    :param options: dictionary containing the options given by the user
-    :return text: The adapted plaintext
-    :return links: The links that were actually present in the text
-    """
-
-    present_links = []
-    new_links = {}
-    for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text):
-        present_links.append(link_number.group(1))
-        if options[INCLUDE_LINKS_IN_PLAINTEXT]:
-            text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text)
-        else:
-            text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text)
-
-    for link_number in links.keys():
-        if link_number in present_links:
-            new_links[str(len(new_links.keys()))] = links[link_number]
-
-    return text, new_links
-
-
-def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options, is_linux_tutorial):
-    """
-    Function that splits os-specific sections into subtitles, parses them using jinja and writes them away
-
-    :param text: full os specific section
-    :param metadata: metadata generated for the full os specific section
-    :param subtitle_order: order of the subtitles generated by the splitter
-    :param title_order_number: order number of the section
-    :param all_metadata: all metadata generated by the splitter
-    :param options: dictionary containing the options given by the user
-    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
-    :return:
-    """
-
-    # Unmangle if's to use jinja parser
-    text = re.sub(IF_MANGLED_PART, "", text)
-
-    for OS in [LINUX, WINDOWS, MACOS]:
-
-        # slightly alter if-statements to be able to use predefined macros
-        text = re.sub(OS, '"' + OS + '"', text)
-
-        # Use jinja to render a different version of the text for each OS
-        template = Template(text)
-        jinja_text = template.render(OS=OS)
-
-        if len(jinja_text) != 0:
-
-            # add first subtitle in front of section again
-            if options[SPLIT_ON_TITLES] or metadata[SUBTITLE] not in make_valid_title(jinja_text[:len(metadata[SUBTITLE]) + 1]):
-                jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text
-            else:
-                jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text
-
-            # re-adjust text to correct overcorrections
-            jinja_text = re.sub('"' + OS + '"', OS, jinja_text)
-
-            with open(TEMP_JINJA_FILE, 'w') as writefile:
-                writefile.write(jinja_text)
-
-            # split in right way
-            _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, is_linux_tutorial, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
-
-            # prepare variables to fix metadata
-            total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:]
-            all_metadata.update(os_specific_metadata)
-
-            # write to files
-            for os_i, os_subtitle in enumerate(os_subtitle_order):
-                # check that file actually has some content
-                if len(os_specific_text[os_subtitle]) > 0:
-                    # add the links to the metadata
-                    if LINKS in metadata.keys():
-                        os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS]
-
-                    # fix parent in the metadata
-                    parent_i = 0
-                    parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1
-                    parent = os_specific_metadata[os_subtitle][MAIN_TITLE]
-
-                    while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order):
-                        if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth:
-                            parent = total_subtitle_order[parent_i]
-                        parent_i += 1
-
-                    if options[SPLIT_ON_PARAGRAPHS] and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]:
-                        os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE]
-                    else:
-                        os_specific_metadata[os_subtitle][PARENT_TITLE] = parent
-
-                    # fix directory in the metadata if needed
-                    if options[DEEP_DIRECTORIES]:
-                        if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]:
-                            os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE])
-                        else:
-                            os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE])
-
-                    # make a directory to save the files
-                    filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
-                    os.makedirs(filepath, exist_ok=True)
-
-                    if options[VERBOSE]:
-                        print("Writing os-specific section " + os_subtitle + " to filepath: " + str(filepath))
-
-                    # write to files
-                    write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial)
-                else:
-                    # don't write empty files
-                    pass
-        else:
-            # don't split empty texts
-            pass
-
-
-def main(options):
-    """
-    main function
-
-    :param options: dictionary containing the options specified by the user to run the script:
-                    {SOURCE_DIRECTORY: The source directory where the original files are located,
-                    DESTINATION_DIRECTORY: The destination directory where the processed files should be written to,
-                    SPLIT_ON_TITLES: boolean indicating whether to split on titles,
-                    SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES),
-                    MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph,
-                    MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text,
-                    INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext,
-                    DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not,
-                    VERBOSE: enable or disable verbose mode}
-    :return:
-    """
-
-    if options[VERBOSE]:
-        print("Running chatbot parser with options: " + str(options))
-
-    if options[DEEP_DIRECTORIES] and options[VERBOSE]:
-        print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
-
-    # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
-    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS), ignore_errors=True)
-    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True)
-    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True)
-
-    # make the necessary directories
-    for directory in [COPIES, PARSED_MDS, IF_MANGLED_FILES]:
-        directory = os.path.join(options[DESTINATION_DIRECTORY], directory)
-        if not os.path.exists(directory):
-            os.makedirs(directory)
-
-    ################### define loop-invariant variables ###################
-
-    # constant that keeps track of the source directory
-    source_directory = options[SOURCE_DIRECTORY]
-
-    # list of all the filenames
-    filenames = {}
-    all_items = os.listdir(source_directory)
-    files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
-    for file in files:
-        filenames[file] = os.path.join(source_directory, file)
-
-    # for loops over all files
-    for filename in filenames.keys():
-        ################### define/reset loop specific variables ###################
-
-        # boolean indicating whether the current file is part of the linux tutorial
-        is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename])
-
-        # make a copy of the original file in order to make sure the original does not get altered
-        copy_file = os.path.join(options[DESTINATION_DIRECTORY], COPIES, filename)
-        shutil.copyfile(filenames[filename], copy_file)
-
-        # variable that keeps track of the directories that are used to write in at different levels
-        root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR)
-        root_dir_os_specific = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR)
-        root_dir_os_specific_linux = os.path.join(root_dir_os_specific, LINUX)
-        root_dir_os_specific_windows = os.path.join(root_dir_os_specific, WINDOWS)
-        root_dir_os_specific_macos = os.path.join(root_dir_os_specific, MACOS)
-
-        # variable for the main title (needed for reference links)
-        main_title = filename[:-3]
-
-        # variable that keeps track of the directories that are used to write in at different levels
-        curr_dirs = [filename[:-3] for _ in range(options[MAX_TITLE_DEPTH] + 1)]
-
-        ################### actually parse the md file ###################
-
-        if options[VERBOSE]:
-            print(LINE + "Processing " + filename)
-            print("Location: " + filenames[filename])
-            print("\nMaking directories:")
-
-        # create directories for the source markdown file
-        for directory in [root_dir_generic, root_dir_os_specific, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
-            if options[VERBOSE]:
-                print(directory)
-            os.makedirs(directory, exist_ok=True)
-
-        if options[VERBOSE]:
-            print("\nParsing the sourcefile with jinja")
-
-        # process the jinja macros
-        jinja_parser(filename, copy_file, options)
-
-        if options[VERBOSE]:
-            print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)")
-
-        # split the text in paragraphs
-        paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options, is_linux_tutorial)
-
-        if options[VERBOSE]:
-            print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata")
-
-        # for every section, either make the whole section generic, or create an os-specific file for each OS
-        for i, subtitle in enumerate(subtitle_order):
-
-            # generic
-            if subtitle in paragraphs_os_free_text.keys():
-                write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options, is_linux_tutorial)
-
-            # os-specific
-            else:
-                split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial)
-
-        if options[VERBOSE]:
-            print("\nFinished processing " + filename)
-
-    if options[VERBOSE]:
-        print(LINE + "Cleaning up directories:")
-        print(os.path.join(options[DESTINATION_DIRECTORY], COPIES))
-        print(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES))
-        print(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL))
-    # clean up temporary directories and files
-    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True)
-    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True)
-    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL), ignore_errors=True)
-    if os.path.exists(TEMP_JINJA_FILE):
-        os.remove(TEMP_JINJA_FILE)
-
-    if options[VERBOSE]:
-        print("Parsing finished successfully")
-
-
-################### run the script ###################
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n")
-
-    # adding command-line options
-    parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located")
-    parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to")
-    parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.")
-    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=512, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)")
-    parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)")
-    parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
-    parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled")
-    parser.add_argument("-v", "--verbose", action="store_true", help="Run the script with verbose output")
-
-    args = parser.parse_args()
-
-    options_dict = {SOURCE_DIRECTORY: args.source,
-                    DESTINATION_DIRECTORY: args.destination,
-                    SPLIT_ON_TITLES: args.split_on_titles,
-                    SPLIT_ON_PARAGRAPHS: not args.split_on_titles,
-                    MIN_PARAGRAPH_LENGTH: args.min_paragraph_length,
-                    MAX_TITLE_DEPTH: args.max_title_depth,
-                    INCLUDE_LINKS_IN_PLAINTEXT: args.links,
-                    DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles,
-                    VERBOSE: args.verbose}
-
-    main(options_dict)
diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt
deleted file mode 100644
index 37137582aad..00000000000
--- a/scripts/HPC_chatbot_preprocessor/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-PyYAML==6.0.2
-Jinja2==3.1.4
-tiktoken~=0.7.0
-pathlib~=1.0.1
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
deleted file mode 100644
index 94270ff37e3..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Main title
-This is the first paragraph of text. It is non-os-specific, however it does contain a link.
-It also contains some other Markdown syntax and an
-example code block.
-This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum
-character limit for a section).
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
deleted file mode 100644
index 08c0b4e4973..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tps1",
-    "subtitle": "Main-title",
-    "source_file": "tests/test_files/ftps/tps1.md",
-    "title_depth": 1,
-    "directory": "tps1",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/generic"
-    },
-    "parent_title": "",
-    "previous_title": null,
-    "next_title": "tps1_paragraph_2",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
deleted file mode 100644
index 58eedc06aa0..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Conclusion
-Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I 
-might add to this if needed.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
deleted file mode 100644
index 2f1ea4dcd1f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "tps1",
-    "subtitle": "Conclusion",
-    "source_file": "tests/test_files/ftps/tps1.md",
-    "title_depth": 2,
-    "directory": "tps1",
-    "parent_title": "",
-    "previous_title": "tps1_paragraph_2",
-    "next_title": null,
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
deleted file mode 100644
index d0ee9ce8256..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-OS specific sections
-This is the second section, it is the start of some 
-text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will
-still add a link.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
deleted file mode 100644
index 208cb3472f4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tps1",
-    "subtitle": "OS-specific-sections",
-    "source_file": "tests/test_files/ftps/tps1.md",
-    "title_depth": 2,
-    "directory": "tps1",
-    "parent_title": "Main-title",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/linuxmacos"
-    },
-    "previous_title": "tps1_paragraph_1",
-    "next_title": "tps1_linux_paragraph_2.2",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
deleted file mode 100644
index 1a3867e69fa..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Non Windows section
-Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
-section that ends right here.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
deleted file mode 100644
index b975dfe4e03..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "tps1",
-    "subtitle": "Non-Windows-section",
-    "source_file": "tests/test_files/ftps/tps1.md",
-    "title_depth": 3,
-    "directory": "tps1",
-    "parent_title": "OS-specific-sections",
-    "previous_title": "tps1_linux_paragraph_2.1",
-    "next_title": "tps1_paragraph_3",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
deleted file mode 100644
index e0642d6ac96..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-OS specific sections
-This is the second section, it is the start of some 
-text specific to OSes that aren't "windows". I feel like there is no need to make this section very long, however I will
-still add a link.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
deleted file mode 100644
index 9c605eb9004..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tps1",
-    "subtitle": "OS-specific-sections",
-    "source_file": "tests/test_files/ftps/tps1.md",
-    "title_depth": 2,
-    "directory": "tps1",
-    "parent_title": "Main-title",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/linuxmacos"
-    },
-    "previous_title": "tps1_paragraph_1",
-    "next_title": "tps1_macos_paragraph_2.2",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
deleted file mode 100644
index 1a3867e69fa..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Non Windows section
-Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
-section that ends right here.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
deleted file mode 100644
index e3ca81d7cc5..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "tps1",
-    "subtitle": "Non-Windows-section",
-    "source_file": "tests/test_files/ftps/tps1.md",
-    "title_depth": 3,
-    "directory": "tps1",
-    "parent_title": "OS-specific-sections",
-    "previous_title": "tps1_macos_paragraph_2.1",
-    "next_title": "tps1_paragraph_3",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
deleted file mode 100644
index 9a9cbe1f3d2..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-OS specific sections
-This is the second section, it is the start of some  text specific to windows.
-In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer 
-to make sure we get a long section that is over the minimum required length for the next newline character to be 
-classified as the end of this section. I am doing this because for the next sections I want to test whether they will be
-grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, 
-let's add a link in this section as well.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
deleted file mode 100644
index ab58c622b8c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tps1",
-    "subtitle": "OS-specific-sections",
-    "source_file": "tests/test_files/ftps/tps1.md",
-    "title_depth": 2,
-    "directory": "tps1",
-    "parent_title": "Main-title",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/windows"
-    },
-    "previous_title": "tps1_paragraph_1",
-    "next_title": "tps1_windows_paragraph_2.2",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
deleted file mode 100644
index 6b57235f68f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Windows specific section
-Like this.
-And this.
-And also this.
-These section should all be grouped together under the windows specific section of the output. The addition of this long
-section at the end should make sure the combination of sections comes to an end here.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
deleted file mode 100644
index 435c9e9c484..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "tps1",
-    "subtitle": "Windows-specific-section",
-    "source_file": "tests/test_files/ftps/tps1.md",
-    "title_depth": 3,
-    "directory": "tps1",
-    "parent_title": "OS-specific-sections",
-    "previous_title": "tps1_windows_paragraph_2.1",
-    "next_title": "tps1_paragraph_3",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md
deleted file mode 100644
index d9b10d0c524..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# Main title
-
-This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md).
-It also contains some `other` *Markdown* _syntax_ and an
-```shell
-example code block.
-```
-This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum
-character limit for a section).
-
-## OS specific sections
-
-This is the second section, it is the start of some {% if OS == windows %} text specific to windows.
-In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer 
-to make sure we get a long section that is over the minimum required length for the next newline character to be 
-classified as the end of this section. I am doing this because for the next sections I want to test whether they will be
-grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, 
-let's add [a link](windows.md) in this section as well.
-
-### Windows specific section
-
-Like this.
-
-And this.
-
-And also this.
-
-These section should all be grouped together under the windows specific section of the output. The addition of this long
-section at the end should make sure the combination of sections comes to an end here.
-{% else %}
-text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will
-still add [a link](linuxmacos.md).
-
-### Non Windows section
-
-Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
-section that ends right here.
-{% endif %}
-
-## Conclusion
-
-Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I 
-might add to this if needed.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt
deleted file mode 100644
index f62a4f31fee..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-blablabla
-blablablabla
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
deleted file mode 100644
index b7786c066a7..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "tts1",
-    "subtitle": "Subtitle-1",
-    "source_file": "tests/test_files/ftts/tts1.md",
-    "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-1",
-    "parent_title": "Main-title",
-    "previous_title": "Main-title",
-    "next_title": "Subtitle-2-g",
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-1"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt
deleted file mode 100644
index bdf68551202..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt
+++ /dev/null
@@ -1 +0,0 @@
-blablabla
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
deleted file mode 100644
index eb5403804e2..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "main_title": "tts1",
-    "subtitle": "Subtitle-5-g",
-    "source_file": "tests/test_files/ftts/tts1.md",
-    "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-5-g",
-    "parent_title": "Main-title",
-    "previous_title": "Subtitle-2-g",
-    "next_title": null,
-    "OS": "generic",
-    "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-5-g"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
deleted file mode 100644
index 48125d91679..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-blablabla generic
-blablabla generic
-blablabla Linux macOS
-blablablabla Linux macOS with a link
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
deleted file mode 100644
index f7330bec86d..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tts1",
-    "subtitle": "Subtitle-2-g",
-    "source_file": "tests/test_files/ftts/tts1.md",
-    "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-2-g",
-    "parent_title": "Main-title",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/linuxmacos"
-    },
-    "previous_title": "Subtitle-1",
-    "next_title": "Subtitle-4-l&m",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-2-g"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
deleted file mode 100644
index b221f26074b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-blablabla Linux macOS
-blablablabla Linux macOS
-blablabla generic with a link
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
deleted file mode 100644
index a76f852c874..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tts1",
-    "subtitle": "Subtitle-4-l&m",
-    "source_file": "tests/test_files/ftts/tts1.md",
-    "title_depth": 3,
-    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
-    "parent_title": "Subtitle-2-g",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/generic"
-    },
-    "previous_title": "Subtitle-2-g",
-    "next_title": "Subtitle-5-g",
-    "OS": "linux",
-    "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-4-lm"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
deleted file mode 100644
index 48125d91679..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-blablabla generic
-blablabla generic
-blablabla Linux macOS
-blablablabla Linux macOS with a link
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
deleted file mode 100644
index 8b234c92fa6..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tts1",
-    "subtitle": "Subtitle-2-g",
-    "source_file": "tests/test_files/ftts/tts1.md",
-    "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-2-g",
-    "parent_title": "Main-title",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/linuxmacos"
-    },
-    "previous_title": "Subtitle-1",
-    "next_title": "Subtitle-4-l&m",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-2-g"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
deleted file mode 100644
index b221f26074b..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-blablabla Linux macOS
-blablablabla Linux macOS
-blablabla generic with a link
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
deleted file mode 100644
index 732d309da81..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tts1",
-    "subtitle": "Subtitle-4-l&m",
-    "source_file": "tests/test_files/ftts/tts1.md",
-    "title_depth": 3,
-    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
-    "parent_title": "Subtitle-2-g",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/generic"
-    },
-    "previous_title": "Subtitle-2-g",
-    "next_title": "Subtitle-5-g",
-    "OS": "macos",
-    "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-4-lm"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
deleted file mode 100644
index f9f20592832..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-blablabla generic
-blablabla generic
-blablabla windows
-blablabla windows with a link
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
deleted file mode 100644
index 7a43426a85f..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tts1",
-    "subtitle": "Subtitle-2-g",
-    "source_file": "tests/test_files/ftts/tts1.md",
-    "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-2-g",
-    "parent_title": "Main-title",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/windows"
-    },
-    "previous_title": "Subtitle-1",
-    "next_title": "Subtitle-3-w",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-2-g"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt
deleted file mode 100644
index 0b587cef85a..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-blablabla windows
-blablablabla windows
-blablabla generic with a link
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
deleted file mode 100644
index 4d7f494320d..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "main_title": "tts1",
-    "subtitle": "Subtitle-3-w",
-    "source_file": "tests/test_files/ftts/tts1.md",
-    "title_depth": 3,
-    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w",
-    "parent_title": "Subtitle-2-g",
-    "links": {
-        "0": "https://docs.hpc.ugent.be/generic"
-    },
-    "previous_title": "Subtitle-2-g",
-    "next_title": "Subtitle-5-g",
-    "OS": "windows",
-    "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-3-w"
-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md
deleted file mode 100644
index 2f3ad7f9c08..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Main title
-
-## Subtitle 1
-
-blablabla
-blablablabla
-
-## Subtitle 2 g
-
-blablabla generic
-blablabla generic
-{% if OS == windows %}blablabla windows
-blablabla windows with a [link](windows.md)
-
-### Subtitle 3 w
-
-blablabla windows
-blablablabla windows
-{% else %}blablabla Linux macOS
-blablablabla Linux macOS with a [link](linuxmacos.md)
-
-### Subtitle 4 l&m
-
-blablabla Linux macOS
-blablablabla Linux macOS
-{% endif %}
-blablabla generic with a [link](generic.md)
-
-## Subtitle 5 g
-
-blablabla
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
deleted file mode 100644
index 6a74b3c0181..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
+++ /dev/null
@@ -1,4 +0,0 @@
-test1: OS_IF
-{% if OS == windows %}
-test1
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
deleted file mode 100644
index 2f9cdc38294..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
+++ /dev/null
@@ -1,4 +0,0 @@
-test1: OS_IF
-{-if-% if OS == windows %-if-}
-test1
-{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
deleted file mode 100644
index 360a4a59ba3..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
+++ /dev/null
@@ -1,7 +0,0 @@
-test2: OS_IF in NON_OS_IF
-{% if site == Gent %}
-test2
-{% if OS == windows %}
-test2
-{% endif %}
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
deleted file mode 100644
index 798dcf6db24..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
+++ /dev/null
@@ -1,7 +0,0 @@
-test2: OS_IF in NON_OS_IF
-{% if site == Gent %}
-test2
-{-if-% if OS == windows %-if-}
-test2
-{-if-% endif %-if-}
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
deleted file mode 100644
index d93125a5971..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
+++ /dev/null
@@ -1,6 +0,0 @@
-test3: OS_IF with else
-{% if OS == linux %}
-test3
-{% else %}
-test3
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
deleted file mode 100644
index 02141961338..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
+++ /dev/null
@@ -1,6 +0,0 @@
-test3: OS_IF with else
-{-if-% if OS == linux %-if-}
-test3
-{-if-% else %-if-}
-test3
-{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
deleted file mode 100644
index cc15fae1df1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
+++ /dev/null
@@ -1,4 +0,0 @@
-test4: OS_IF with wrong syntax
-{ if OS == macos }
-test4
-{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
deleted file mode 100644
index cc15fae1df1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
+++ /dev/null
@@ -1,4 +0,0 @@
-test4: OS_IF with wrong syntax
-{ if OS == macos }
-test4
-{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
deleted file mode 100644
index bdb288474e2..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
+++ /dev/null
@@ -1,11 +0,0 @@
-test5: OS_IF in OS_IF
-{% if OS == windows %}
-test5
-{% else %}
-{% if OS == linux %}
-test5
-{% else %}
-test5
-{% endif %}
-test5
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
deleted file mode 100644
index 10443eb67a4..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
+++ /dev/null
@@ -1,11 +0,0 @@
-test5: OS_IF in OS_IF
-{-if-% if OS == windows %-if-}
-test5
-{-if-% else %-if-}
-{-if-% if OS == linux %-if-}
-test5
-{-if-% else %-if-}
-test5
-{-if-% endif %-if-}
-test5
-{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
deleted file mode 100644
index 0731ee3588c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
+++ /dev/null
@@ -1,8 +0,0 @@
-test6: NON_OS_IF in OS_IF
-{% if OS == macos %}
-test6
-{% if site == Gent %}
-test6
-{% endif %}
-test6
-{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
deleted file mode 100644
index cd37117cb00..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
+++ /dev/null
@@ -1,8 +0,0 @@
-test6: NON_OS_IF in OS_IF
-{-if-% if OS == macos %-if-}
-test6
-{% if site == Gent %}
-test6
-{% endif %}
-test6
-{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
deleted file mode 100644
index 6a72a338527..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
+++ /dev/null
@@ -1,9 +0,0 @@
-test7: weird spacing and dashes
-	{%if OS == windows %}
-	test7
-{%- else%}
-	test7
-		{% if OS == linux%}
-test7
-	{%-endif %}
-{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
deleted file mode 100644
index dfe342ebfb1..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
+++ /dev/null
@@ -1,9 +0,0 @@
-test7: weird spacing and dashes
-	{-if-%if OS == windows %-if-}
-	test7
-{-if-%- else%-if-}
-	test7
-		{-if-% if OS == linux%-if-}
-test7
-	{-if-%-endif %-if-}
-{-if-%endif%-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
deleted file mode 100644
index fb8c1f8b539..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
+++ /dev/null
@@ -1,55 +0,0 @@
-test1: OS_IF
-{% if OS == windows %}
-test1
-{% endif %}
-
-test2: OS_IF in NON_OS_IF
-{% if site == Gent %}
-test2
-{% if OS == windows %}
-test2
-{% endif %}
-{% endif %}
-
-test3: OS_IF with else
-{% if OS == linux %}
-test3
-{% else %}
-test3
-{% endif %}
-
-test4: OS_IF with wrong syntax
-{ if OS == macos }
-test4
-{ endif }
-
-test5: OS_IF in OS_IF
-{% if OS == windows %}
-test5
-{% else %}
-{% if OS == linux %}
-test5
-{% else %}
-test5
-{% endif %}
-test5
-{% endif %}
-
-test6: NON_OS_IF in OS_IF
-{% if OS == macos %}
-test6
-{% if site == Gent %}
-test6
-{% endif %}
-test6
-{% endif %}
-
-test7: weird spacing and dashes
-	{%if OS == windows %}
-	test7
-{%- else%}
-	test7
-		{% if OS == linux%}
-test7
-	{%-endif %}
-{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
deleted file mode 100644
index 796e94348fa..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
+++ /dev/null
@@ -1,55 +0,0 @@
-test1: OS_IF
-{-if-% if OS == windows %-if-}
-test1
-{-if-% endif %-if-}
-
-test2: OS_IF in NON_OS_IF
-{% if site == Gent %}
-test2
-{-if-% if OS == windows %-if-}
-test2
-{-if-% endif %-if-}
-{% endif %}
-
-test3: OS_IF with else
-{-if-% if OS == linux %-if-}
-test3
-{-if-% else %-if-}
-test3
-{-if-% endif %-if-}
-
-test4: OS_IF with wrong syntax
-{ if OS == macos }
-test4
-{ endif }
-
-test5: OS_IF in OS_IF
-{-if-% if OS == windows %-if-}
-test5
-{-if-% else %-if-}
-{-if-% if OS == linux %-if-}
-test5
-{-if-% else %-if-}
-test5
-{-if-% endif %-if-}
-test5
-{-if-% endif %-if-}
-
-test6: NON_OS_IF in OS_IF
-{-if-% if OS == macos %-if-}
-test6
-{% if site == Gent %}
-test6
-{% endif %}
-test6
-{-if-% endif %-if-}
-
-test7: weird spacing and dashes
-	{-if-%if OS == windows %-if-}
-	test7
-{-if-%- else%-if-}
-	test7
-		{-if-% if OS == linux%-if-}
-test7
-	{-if-%-endif %-if-}
-{-if-%endif%-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
deleted file mode 100644
index 1e18a1495d5..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Title
-
-Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.
-
-1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.
-
-2. Second entry
-
-3. Third entry
-
-    ![image](img/an_image_for_the_third_entry.png)
-
-4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.
-
-And now the text continues like normal in a new section.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
deleted file mode 100644
index 91605dec651..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import pytest
-import os
-import shutil
-from chatbot_parser import main
-
-
-@pytest.mark.parametrize("input_directory,actual_output_directory,expected_output_directory, options", [
-    ("tests/test_files/ftps", "tests/test_files/ftps/actual",
-     "tests/test_files/ftps/output",
-     {"SOURCE_DIRECTORY": "tests/test_files/ftps",
-      "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual",
-      "SPLIT_ON_TITLES": False,
-      "SPLIT_ON_PARAGRAPHS": True,
-      "MIN_PARAGRAPH_LENGTH": 160,
-      "MAX_TITLE_DEPTH": 4,
-      "INCLUDE_LINKS_IN_PLAINTEXT": False,
-      "DEEP_DIRECTORIES": False,
-      "VERBOSE": False}
-     ),
-    ("tests/test_files/ftts", "tests/test_files/ftts/actual",
-     "tests/test_files/ftts/output",
-     {"SOURCE_DIRECTORY": "tests/test_files/ftts",
-      "DESTINATION_DIRECTORY": "tests/test_files/ftts/actual",
-      "SPLIT_ON_TITLES": True,
-      "SPLIT_ON_PARAGRAPHS": False,
-      "MIN_PARAGRAPH_LENGTH": 160,
-      "MAX_TITLE_DEPTH": 4,
-      "INCLUDE_LINKS_IN_PLAINTEXT": False,
-      "DEEP_DIRECTORIES": True,
-      "VERBOSE": False}
-     )
-])
-def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options):
-    # run the script
-    main(options)
-
-    # Compare directories and files
-    for dirpath, dirnames, filenames in os.walk(expected_output_directory):
-        relative_path = os.path.relpath(dirpath, expected_output_directory)
-        actual_dir = os.path.join(actual_output_directory, relative_path)
-
-        # Check if the directory exists
-        assert os.path.isdir(actual_dir), f"Directory '{actual_dir}' is missing."
-
-        # Check for files
-        for filename in filenames:
-            ref_file = os.path.join(dirpath, filename)
-            gen_file = os.path.join(actual_dir, filename)
-
-            # Check if the file exists
-            assert os.path.isfile(gen_file), f"File '{gen_file}' is missing."
-
-            # Check file content
-            with open(ref_file, 'r') as ref_f, open(gen_file, 'r') as gen_f:
-                ref_content = ref_f.read().strip()
-                gen_content = gen_f.read().strip()
-                assert ref_content == gen_content, f"Content of file '{gen_file}' does not match."
-
-    # check that not too many directories have been generated
-    for dirpath, dirnames, filenames in os.walk(actual_output_directory):
-        relative_path = os.path.relpath(dirpath, actual_output_directory)
-        expected_dir = os.path.join(expected_output_directory, relative_path)
-
-        # Check if the directory exists
-        assert os.path.isdir(expected_dir), f"Directory '{relative_path}' was made, but shouldn't have been."
-
-    # remove directory
-    shutil.rmtree(actual_output_directory, ignore_errors=True)
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
deleted file mode 100644
index 4d0dd876103..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import pytest
-import os
-import shutil
-from chatbot_parser import mangle_ifs
-
-
-@pytest.mark.parametrize("input_file,output_file", [
-    ("if_mangler_1_input.md", "if_mangler_1_output.md"),
-    ("if_mangler_2_input.md", "if_mangler_2_output.md"),
-    ("if_mangler_3_input.md", "if_mangler_3_output.md"),
-    ("if_mangler_4_input.md", "if_mangler_4_output.md"),
-    ("if_mangler_5_input.md", "if_mangler_5_output.md"),
-    ("if_mangler_6_input.md", "if_mangler_6_output.md"),
-    ("if_mangler_7_input.md", "if_mangler_7_output.md")
-])
-def test_if_mangler(input_file, output_file):
-    # make directory
-    os.makedirs(os.path.join("if_mangled_files"), exist_ok=True)
-
-    # make filepaths
-    input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file)
-    expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file)
-    actual_output_file_path = os.path.join("if_mangled_files", input_file)
-    mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'})
-
-    # check every line
-    with open(expected_output_file_path, "r") as expected_read_file:
-        with open(actual_output_file_path, "r") as actual_read_file:
-            assert all([expected_line == actual_line for expected_line, actual_line in zip(expected_read_file, actual_read_file)])
-
-    # remove directory
-    shutil.rmtree("if_mangled_files", ignore_errors=True)
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py
deleted file mode 100644
index 9109f2518ad..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import pytest
-from chatbot_parser import insert_links
-
-options_include = {"INCLUDE_LINKS_IN_PLAINTEXT": True}
-options_leave_out = {"INCLUDE_LINKS_IN_PLAINTEXT": False}
-links_input = {"0": "https://first_link.com", "1": "https://second_link.be", "2": "https://docs.hpc.ugent.be/account#welcome-e-mail", "3": "https://final-link.org"}
-
-
-@pytest.mark.parametrize("text_input, options_input, text_output, new_links", [
-    # Text without links
-    # don't include links
-    ("Text without links\nand with two lines.", options_leave_out, "Text without links\nand with two lines.", {}),
-    # include links
-    ("Text without links\nand with two lines.", options_include, "Text without links\nand with two lines.", {}),
-    # Text with all links
-    # don't include links
-    ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_leave_out,
-     "Text with all the links\nand with multiple lines.\n\n\n\n", links_input),
-    # include links
-    ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_include,
-     "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://second_link.be \n https://docs.hpc.ugent.be/account#welcome-e-mail \n https://final-link.org ", links_input),
-    # Text with some links
-    # don't include links
-    ("Text with all the links\nand with multiple lines.\n§link§link§1§link§link§\n§link§link§3§link§link§", options_leave_out,
-     "Text with all the links\nand with multiple lines.\n\n", {"0": "https://second_link.be", "1": "https://final-link.org"}),
-    # include links
-    ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§2§link§link§", options_include,
-     "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://docs.hpc.ugent.be/account#welcome-e-mail ", {"0": "https://first_link.com", "1": "https://docs.hpc.ugent.be/account#welcome-e-mail"})
-])
-def test_insert_links(text_input, options_input, text_output, new_links):
-    assert insert_links(text_input, links_input, options_input) == (text_output, new_links)
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py
deleted file mode 100644
index d1acca1d740..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_links.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import os
-import pytest
-from urllib import request
-from chatbot_parser import main
-import json
-
-whitelist = ["mailto:hpc@ugent.be"]
-slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"]
-
-options_general = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC",
-                   "DESTINATION_DIRECTORY": ".",
-                   "SPLIT_ON_TITLES": False,
-                   "SPLIT_ON_PARAGRAPHS": True,
-                   "MIN_PARAGRAPH_LENGTH": 683,
-                   "MAX_TITLE_DEPTH": 4,
-                   "INCLUDE_LINKS_IN_PLAINTEXT": False,
-                   "DEEP_DIRECTORIES": False,
-                   "VERBOSE": False}
-options_os_specific = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC/linux-tutorial",
-                       "DESTINATION_DIRECTORY": "./linux-tutorial",
-                       "SPLIT_ON_TITLES": False,
-                       "SPLIT_ON_PARAGRAPHS": True,
-                       "MIN_PARAGRAPH_LENGTH": 683,
-                       "MAX_TITLE_DEPTH": 4,
-                       "INCLUDE_LINKS_IN_PLAINTEXT": False,
-                       "DEEP_DIRECTORIES": False,
-                       "VERBOSE": False}
-
-
-@pytest.mark.parametrize("options", [options_general, options_os_specific])
-def test_all_links(options):
-    all_links = {}
-    main(options)
-    broken_links = {}
-    empty_links = {}
-
-    for (dirpath, dirnames, filenames) in os.walk(os.path.join(options['DESTINATION_DIRECTORY'], 'parsed_mds')):
-        for filename in filenames:
-            all_links[filename] = []
-            if filename.endswith('metadata.json'):
-                data = json.load(open(os.path.join(dirpath, filename)))
-                if 'links' in data.keys():
-                    for key in data['links'].keys():
-                        all_links[filename].append(data['links'][key])
-                all_links[filename].append(data['reference_link'].split("#")[0])
-
-    for filename in all_links.keys():
-        all_links[filename] = list(set(all_links[filename]))
-        for link in all_links[filename]:
-            if len(link) != 0:
-                try:
-                    if link not in whitelist and link not in slow_list:
-                        with request.urlopen(link) as res:
-                            if res.status == 200:
-                                pass
-                except:
-                    print("Broken link in " + filename + ": " + link)
-                    if filename in broken_links.keys():
-                        broken_links[filename].append(link)
-                    else:
-                        broken_links[filename] = [link]
-            else:
-                print("Empty link in " + filename)
-                if filename in empty_links.keys():
-                    empty_links[filename].append(link)
-                else:
-                    empty_links[filename] = [link]
-    assert len(empty_links.keys()) == 0
-    assert len(broken_links.keys()) == 0
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
deleted file mode 100644
index 06e56a5cb2c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import pytest
-from chatbot_parser import split_on_paragraphs
-
-
-@pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [
-    ("./test_files/list_file/list_test.md",
-     "list_test.md",
-     {
-         "SOURCE_DIRECTORY": "./test_files/list_file",
-         "DESTINATION_DIRECTORY": "./test_files/list_file",
-         "SPLIT_ON_TITLES": False,
-         "SPLIT_ON_PARAGRAPHS": True,
-         "MIN_PARAGRAPH_LENGTH": 100,
-         "MAX_TITLE_DEPTH": 4,
-         "INCLUDE_LINKS_IN_PLAINTEXT": False,
-         "DEEP_DIRECTORIES": False,
-         "VERBOSE": False
-     },
-     False,
-     {
-         'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n",
-         'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'
-     }
-     )
-])
-def test_links(file, main_title, options, is_linux_tutorial, expected_text):
-    assert split_on_paragraphs(file, main_title, options, is_linux_tutorial)[1] == expected_text
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
deleted file mode 100644
index 225c368477d..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import pytest
-from chatbot_parser import make_valid_title
-
-
-@pytest.mark.parametrize("input_string,expected", [
-    ("", ""),
-    ("A-good-filename-with-dashes", "A-good-filename-with-dashes"),
-    (" A very good filename beginning and ending in a space ", "A-very-good-filename-beginning-and-ending-in-a-space"),
-    ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"),
-    ("A filename containing bad characters <>:\"/\\|?*\0", "A-filename-containing-bad-characters"),
-    ("A filename ending with {some jinja garbage}", "A-filename-ending-with")
-])
-def test_make_valid_title(input_string, expected):
-    assert make_valid_title(input_string) == expected
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
deleted file mode 100644
index f4cee6dd75c..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import pytest
-from chatbot_parser import replace_markdown_markers
-
-
-@pytest.mark.parametrize("input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist", [
-    # baseline test
-    ("A normal line with nothing special", [], False, "", "A normal line with nothing special", []),
-    # image 1
-    ("![image](a-nice-image.png)", [], False, "", "", []),
-    # image 2
-    ("![](img/Look-at-this-photograph.png)", [], False, "", "", []),
-    # link 1 (outside docs)
-    ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "",
-     "A line with a link§link§link§1§link§link§", ["another-link.be", "a-nice-link.com"]),
-    # link 2 (another document within the docs)
-    ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "",
-     "A line with a link to the docs§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
-    # link 3 (the same document)
-    ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md",
-     "A line with a link to the same doc§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
-    # codeblock
-    ("```shell", [], True, "", "", []),
-    # html syntax 1 (normal syntax)
-    ("A line with something in <b>Bold</b>", [], False, "", "A line with something in Bold", []),
-    # html syntax 2 (link)
-    ("A line with another link<a href=website.com>", ["other-website.com"], False, "",
-     "A line with another link§link§link§1§link§link§", ["other-website.com", "website.com"]),
-    # html syntax 3 (style)
-    ("<p style='text-align: center'>A line with style</p>", [], False, "", "A line with style", []),
-    # Bot comment
-    ("<!--INPUT_FOR_BOTSomething about the following table-->", [], False, "", "Something about the following table", []),
-    # non-Bot comment
-    ("<!--Something else about the following table-->", [], False, "", "", []),
-    # something else with <>
-    ("A line with an example where you should put <your own input>", [], False, "", "A line with an example where you should put <your own input>", []),
-    # info/tips/warnings
-    ("!!! warning", [], False, "", " warning", []),
-    # collapsable admonitions
-    ("??? note", [], False, "", " note", []),
-    # Markdown syntax 1 (not in code block)
-    ("`Line` **with** ++a++ _lot_ *of* _++markdown++_ `syntax`", [], False, "", "Line with a lot of markdown syntax", []),
-    # Markdown syntax 2 (in code block)
-    ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [])
-])
-def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist):
-    assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist)
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
deleted file mode 100644
index 6c30fef7985..00000000000
--- a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import pytest
-import os
-from chatbot_parser import write_metadata
-
-
-@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [
-    ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}),
-    ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2,
-     os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source",
-     {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2,
-      "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"),
-      "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}})
-])
-def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output):
-    assert write_metadata(main_title, subtitle, links, title_level, directory, source_file) == output

From 445f7eec653638100120addeae1c25114e69022c Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 14:48:58 +0200
Subject: [PATCH 145/152] Revert "removing unnecessary files"

This reverts commit 32b8b741c8582a98b122b230742e1be09ba8c698.
---
 scripts/HPC_chatbot_preprocessor/README.md    |  196 +++
 .../chatbot_parser.py                         | 1236 +++++++++++++++++
 .../HPC_chatbot_preprocessor/requirements.txt |    4 +
 .../generic/tps1/tps1_paragraph_1.txt         |    6 +
 .../tps1/tps1_paragraph_1_metadata.json       |   15 +
 .../generic/tps1/tps1_paragraph_3.txt         |    3 +
 .../tps1/tps1_paragraph_3_metadata.json       |   12 +
 .../linux/tps1/tps1_linux_paragraph_2.1.txt   |    4 +
 .../tps1_linux_paragraph_2.1_metadata.json    |   15 +
 .../linux/tps1/tps1_linux_paragraph_2.2.txt   |    3 +
 .../tps1_linux_paragraph_2.2_metadata.json    |   12 +
 .../macos/tps1/tps1_macos_paragraph_2.1.txt   |    4 +
 .../tps1_macos_paragraph_2.1_metadata.json    |   15 +
 .../macos/tps1/tps1_macos_paragraph_2.2.txt   |    3 +
 .../tps1_macos_paragraph_2.2_metadata.json    |   12 +
 .../tps1/tps1_windows_paragraph_2.1.txt       |    7 +
 .../tps1_windows_paragraph_2.1_metadata.json  |   15 +
 .../tps1/tps1_windows_paragraph_2.2.txt       |    6 +
 .../tps1_windows_paragraph_2.2_metadata.json  |   12 +
 .../tests/test_files/ftps/tps1.md             |   43 +
 .../tts1/Main-title/Subtitle-1/Subtitle-1.txt |    2 +
 .../Subtitle-1/Subtitle-1_metadata.json       |   12 +
 .../Main-title/Subtitle-5-g/Subtitle-5-g.txt  |    1 +
 .../Subtitle-5-g/Subtitle-5-g_metadata.json   |   12 +
 .../Main-title/Subtitle-2-g/Subtitle-2-g.txt  |    4 +
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   |   15 +
 .../Subtitle-4-l&m/Subtitle-4-l&m.txt         |    3 +
 .../Subtitle-4-l&m_metadata.json              |   15 +
 .../Main-title/Subtitle-2-g/Subtitle-2-g.txt  |    4 +
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   |   15 +
 .../Subtitle-4-l&m/Subtitle-4-l&m.txt         |    3 +
 .../Subtitle-4-l&m_metadata.json              |   15 +
 .../Main-title/Subtitle-2-g/Subtitle-2-g.txt  |    4 +
 .../Subtitle-2-g/Subtitle-2-g_metadata.json   |   15 +
 .../Subtitle-3-w/Subtitle-3-w.txt             |    3 +
 .../Subtitle-3-w/Subtitle-3-w_metadata.json   |   15 +
 .../tests/test_files/ftts/tts1.md             |   31 +
 .../if_mangler_1_input.md                     |    4 +
 .../if_mangler_1_output.md                    |    4 +
 .../if_mangler_2_input.md                     |    7 +
 .../if_mangler_2_output.md                    |    7 +
 .../if_mangler_3_input.md                     |    6 +
 .../if_mangler_3_output.md                    |    6 +
 .../if_mangler_4_input.md                     |    4 +
 .../if_mangler_4_output.md                    |    4 +
 .../if_mangler_5_input.md                     |   11 +
 .../if_mangler_5_output.md                    |   11 +
 .../if_mangler_6_input.md                     |    8 +
 .../if_mangler_6_output.md                    |    8 +
 .../if_mangler_7_input.md                     |    9 +
 .../if_mangler_7_output.md                    |    9 +
 .../if_mangler_test_files/if_mangler_input.md |   55 +
 .../if_mangler_output.md                      |   55 +
 .../tests/test_files/list_file/list_test.md   |   15 +
 .../tests/test_full_script.py                 |   68 +
 .../tests/test_if_mangler.py                  |   32 +
 .../tests/test_insert_links.py                |   31 +
 .../tests/test_links.py                       |   69 +
 .../tests/test_lists.py                       |   27 +
 .../tests/test_make_valid_title.py            |   14 +
 .../tests/test_replace_markdown_markers.py    |   46 +
 .../tests/test_write_metadata.py              |   15 +
 62 files changed, 2317 insertions(+)
 create mode 100644 scripts/HPC_chatbot_preprocessor/README.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/chatbot_parser.py
 create mode 100644 scripts/HPC_chatbot_preprocessor/requirements.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_links.py
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_lists.py
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
 create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py

diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md
new file mode 100644
index 00000000000..6cfd9be8231
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/README.md
@@ -0,0 +1,196 @@
+# Chatbot parser
+
+`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot.
+
+## Usage
+
+The script can be ran in a shell environment with the following command:
+
+```shell
+python chatbot_parser.py
+```
+
+This command has the following possible options:
+
+```shell
+chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd]
+```
+
+### Options
+
+#### `h`/`help`
+
+Display the help message
+
+#### `src`/`source`
+
+This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`).
+
+#### `dst`/`destination`
+
+This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script.
+
+#### `st`/`split_on_titles`
+
+Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length.
+
+#### `pl`/`min_paragraph_length`
+
+This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 512 tokens. This options only works if `split_on_titles` is not enabled.
+
+#### `td`/`max_title_depth`
+
+This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4.
+
+#### `l`/`links`
+
+Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext.
+
+#### `dd`/`deep_directories`
+
+Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled.
+
+## Generated file structure
+
+The generated directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: 
+
+- `generic` contains the parts of the markdown sources that were non-OS-specific
+- `os_specific` contains the parts of the markdown sources that were OS-specific
+
+Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation.
+
+Both the generic and each of the three os-specific directories then contain a directory for each source file. 
+
+If the option `deep_directories` is not enabled, all paragraphs of the source file and their corresponding metadata will be saved in this directory. The (processed) plaintext of the paragraph is written to a `.txt` file and the metadata is written to a `.json` file.
+
+If the option `deep_directories` is enabled, the directory of each source file will contain a subdirectory structure corresponding to the structure of the subtitles at different levels in the source file. Each subtitle in the source file corresponds to a directory nested in the directory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). 
+
+Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and a `.json` file with the metadata of that section.
+
+## Requirements
+
+- The required Python packages are listed in `requirements.txt`
+
+## Restrictions on source-files
+
+Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input.
+
+### Nested if structures
+
+The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. 
+
+#### Examples of valid and invalid if-structures
+
+##### Allowed
+
+###### non-os-related in os-related
+
+This is an example of one of the basic allowed if-structures (`NON_OS_IF_IN_OS_IF`)
+
+```
+if OS == windows:
+  if site == Gent:
+    ...
+  endif
+endif
+```
+
+###### os-related in os-related in non-os-related
+
+This is an example of the basic allowed if-structure `OS_IF_IN_OS_IF` nested in a non-os-specific if.
+
+```
+if site == Gent:
+  if OS == windows:
+    ...
+  else:
+    if OS == Linux:
+      ...
+    endif
+  endif
+endif
+```
+
+##### Not allowed
+
+###### non-os-related in os-related in os-related
+
+This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures (`OS_IF_IN_OS_IF`).
+
+```
+if OS != windows:
+  if OS == Linux:
+    if site == Gent:
+      ...
+    endif
+  endif
+endif
+```
+
+This will result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it.
+
+###### os-related in non-os-related in os-related
+
+This is an example of the basic allowed if-structure `OS_IF` (indirectly) nested in an os-specific if-structure.
+
+```
+if OS != windows:
+  if site == Gent:
+    if OS == Linux:
+      ...
+    endif
+  endif
+endif
+```
+
+This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it.
+
+### Non OS-related if-statements
+
+Due to the way jinja parses the sourcefiles, the script slightly alters non os-specific if-statements as well. It expects if-statements of the following form:
+
+```
+{%- if site == gent %}
+{% if site != (gent or brussel) %}
+```
+
+All spaces and the dash are optional. City names don't need to be fully lowercase since the parser will capitalize them properly anyway.
+
+### html syntax
+
+The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. 
+
+### Comments
+
+Any comments within the markdown files (for example TODO's) should follow the following syntax:
+
+```
+<!--your comment-->
+```
+ and should be limited to one line.
+
+Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such.
+
+```
+<!--INPUT_FOR_BOT: your comment for the bot-->
+```
+
+This will be reworked to
+ 
+```
+your comment for the bot
+```
+
+in the final output.
+
+### Long filepaths
+
+Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`.
+
+### Markdown lists
+
+The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle  list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays.
+
+### Links
+
+Part of the metadata of the parser are links. In order for the links to be built up in the right way, links to external sites should always start with either `https://` or `http://`.
diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
new file mode 100644
index 00000000000..24e0b287a0a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -0,0 +1,1236 @@
+#!/usr/bin/env python3
+
+import argparse
+import copy
+import json
+import os
+import re
+import shutil
+import tiktoken
+import yaml
+from itertools import chain, tee, zip_longest
+from pathlib import Path
+from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template
+
+#################### define macro's ####################
+# options
+SOURCE_DIRECTORY = "SOURCE_DIRECTORY"
+DESTINATION_DIRECTORY = "DESTINATION_DIRECTORY"
+SPLIT_ON_TITLES = "SPLIT_ON_TITLES"
+MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH"
+MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH"
+INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT"
+SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS"
+DEEP_DIRECTORIES = "DEEP_DIRECTORIES"
+VERBOSE = "VERBOSE"
+
+# directories
+PARSED_MDS = "parsed_mds"
+COPIES = "copies"
+IF_MANGLED_FILES = "if_mangled_files"
+LINUX_TUTORIAL = "linux-tutorial"
+RETURN_DIR = ".."
+MKDOCS_DIR = "mkdocs"
+DOCS_DIR = "docs"
+HPC_DIR = "HPC"
+EXTRA_DIR = "extra"
+GENERIC_DIR = "generic"
+OS_SPECIFIC_DIR = "os_specific"
+MACROS = "macros"
+
+# OSes
+LINUX = "linux"
+WINDOWS = "windows"
+MACOS = "macos"
+GENERIC = "generic"
+LINK_OS = {LINUX: "Linux", WINDOWS: "Windows", MACOS: "macOS"}  # OS needs different capitalisation for use in links
+
+# urls
+REPO_URL = 'https://github.com/hpcugent/vsc_user_docs'
+DOCS_URL = "https://docs.hpc.ugent.be"
+
+# OS-related if-states
+ACTIVE = "active"
+INACTIVE = "inactive"
+
+# if mangler states
+NON_OS_IF = 0
+NON_OS_IF_IN_OS_IF = 1
+OS_IF = 2
+OS_IF_IN_OS_IF = 3
+
+# if mangler macros
+IF_MANGLED_PART = "-if-"
+
+# actions
+DONE = "done"
+WRITE_TEXT = "write_text"
+CHECK_EXTRA_MESSAGE = "check_extra_message"
+WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message"
+
+# Metadata attributes
+SOURCE_FILE = "source_file"
+MAIN_TITLE = "main_title"
+SUBTITLE = "subtitle"
+TITLE_DEPTH = "title_depth"
+DIRECTORY = "directory"
+LINKS = "links"
+PARENT_TITLE = "parent_title"
+PREVIOUS_SUBTITLE = "previous_title"
+NEXT_SUBTITLE = "next_title"
+METADATA_OS = "OS"
+REFERENCE_LINK = "reference_link"
+
+# if-structure components
+IF = "if"
+ELSE = "else"
+ENDIF = "endif"
+
+# link indicator
+LINK_MARKER = r'§link§link§'
+
+# HTML tags
+HTML_TAGS = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"]  # make sure these are always lowercase
+
+# regex patterns
+IF_MANGLED_PATTERNS = {
+        IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})',
+        ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})',
+        ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})'
+    }
+
+# filenames (and parts of filenames)
+TEMP_JINJA_FILE = "jinja_file.txt"
+_PARAGRAPH_ = "_paragraph_"
+METADATA_EXTENSION = "_metadata"
+
+# Marker for comments for the bot
+INPUT_FOR_BOT = "INPUT_FOR_BOT: "
+
+# Standard strings for verbose output
+LINE = "------------------------------------------------------------------------------------------------------\n"
+
+
+################### define functions ###################
+
+def check_for_title(line, in_code_block, curr_dirs, options):
+    """
+    function that checks for titles in the current line. Used by split_text to split the text among the subtitles
+
+    :param line: the current line to be checked for a title
+    :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles
+    :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found
+    :param options: dictionary containing the options given by the user
+    :return title_length: The amount of hashtags in front of the title on the current line
+    """
+    # detect titles
+    match = re.match(r'^#+ ', line)
+    if match and len(match.group(0)) <= options[MAX_TITLE_DEPTH] + 1 and not in_code_block:
+        title_length = len(match.group(0)) - 1
+        if options[DEEP_DIRECTORIES]:
+            curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-')))
+
+            # update the higher order current directories
+            for i in range(title_length + 1, options[MAX_TITLE_DEPTH] + 1):
+                curr_dirs[i] = curr_dirs[title_length]
+
+        return title_length
+    else:
+        return 0
+
+
+def make_valid_link(link, main_title, is_linux_tutorial):
+    """
+    Function that converts a string to a valid link to be used in the metadata
+
+    :param link: the input string to be turned into a valid link
+    :param main_title: the main title of the file that contains the link
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
+    :return link: the valid link
+    """
+
+    # ugly fix for problem with links
+    linux_tutorial_files = ["beyond_the_basics", "common_pitfalls", "getting_started", "hpc_infrastructure", "index", "manipulating_files_and_directories", "navigating", "uploading_files"]
+    if is_linux_tutorial and any([linux_tutorial_files[i] in link for i in range(len(linux_tutorial_files))]):
+        linux_part = LINUX_TUTORIAL + '/'
+    else:
+        linux_part = ""
+
+    if link.startswith('http://') or link.startswith('https://') or link.startswith('mailto:'):
+        pass
+    else:
+        if link.startswith("./"):
+            link = link.replace('./', '')
+        elif link.startswith("../"):
+            link = link.replace('../', '')
+
+        if link.startswith("#"):
+            link = DOCS_URL + '/' + linux_part + main_title + "/" + link
+        elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]):
+            link = DOCS_URL + '/' + linux_part + link.replace(".md", "")
+        elif '.md#' in link:
+            link = DOCS_URL + '/' + linux_part + link.replace(".md", "/")
+        else:
+            link = DOCS_URL + '/' + linux_part + link
+
+    link = link.replace('index/', '').replace('/index', '')
+
+    return link
+
+
+def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_linux_tutorial):
+    """
+    function that replaces certain markdown structures with the equivalent used on the website
+
+    :param curr_line: the current line on which markdown structures need to be replaced
+    :param linklist: the list used to store links that need to be printed at the end of the file
+    :param in_code_block: boolean indicating whether the current line is part of a code block
+    :param main_title: the main title of the file that is being processed
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
+    :return curr_line: the adapted current line
+    :return linklist: the updated linklist
+    """
+
+    # replace images with an empty line
+    if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[.*?]\(img/.*?\.png\)', curr_line):
+        curr_line = ""
+
+    # replace links with a reference
+    matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line)
+    if matches:
+        for match in matches:
+            curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER)
+
+            linklist.append(make_valid_link(match[1], main_title, is_linux_tutorial))
+
+    # codeblock (with ``` -> always stands on a separate line, so line can be dropped)
+    if '```' in curr_line:
+        curr_line = ""
+
+    # structures within <>
+    match = re.findall(r'<(.*?)>', curr_line)
+    if match:
+        for i, content in enumerate(match):
+            html_tags_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in HTML_TAGS]))
+            html_tags_style = [element + " style=.*" for element in HTML_TAGS]
+
+            # add references for every link of format <a href=...>
+            if re.search(r'a href=.*', content):
+                link = content[7:]
+                curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist)) + LINK_MARKER, curr_line)
+                linklist.append(link)
+
+            # drop the syntax words
+            elif content.lower() in html_tags_variations:
+                curr_line = re.sub(f'<{content}>', "", curr_line)
+
+            # drop the version of the HTML_TAGS followed by " style="
+            elif any(re.match(pattern, content) for pattern in html_tags_style):
+                curr_line = re.sub(r'<.*?>', "", curr_line)
+
+            # keep comments for bot
+            elif re.fullmatch(r'!--' + INPUT_FOR_BOT + r'.*?--', content):
+                curr_line = re.sub(r'<!--' + INPUT_FOR_BOT + r'(.*?)-->', lambda m: m.group(1), curr_line)
+
+            # drop comments
+            elif re.fullmatch(r'!--.*?--', content):
+                curr_line = re.sub(r'<.*?>', "", curr_line)
+
+            # drop the <> around links
+            elif re.match(r'http://', content) or re.match(r'https://', content):
+                curr_line = re.sub(r'<' + content + '>', content, curr_line )
+
+            # keep the rest
+            else:
+                pass
+
+    # structures with !!! (info, tips, warnings)
+    if '!!!' in curr_line:
+        curr_line = re.sub(r'!!!', "", curr_line)
+
+    # structures with ??? (collapsable admonitions)
+    if '???' in curr_line:
+        curr_line = re.sub(r'\?\?\?', "", curr_line)
+
+    # get rid of other indicators (`, *, +, _)
+    if not in_code_block:
+
+        backquotes = re.findall(r'`(.*?)`', curr_line)
+        if backquotes:
+            for i, content in enumerate(backquotes):
+                curr_line = curr_line.replace(f"`{content}`", content)
+
+        asterisks = re.findall(r'(?<!\\)(\*+)(.+?)\1', curr_line)
+        if asterisks:
+            for i, content in enumerate(asterisks):
+                curr_line = re.sub(r"(\*+)" + content[1] + r"\1", content[1], curr_line)
+
+        pluses = list(set(re.findall(r'\+\+([^ ]+?)\+\+', curr_line) + re.findall(r'\+\+(".+?")\+\+', curr_line)))
+        if pluses:
+            for i, content in enumerate(pluses):
+                curr_line = re.sub(r"\+\+" + content + r"\+\+", content, curr_line)
+
+        underscores = re.findall(r' (_+)(.+?)\1 ', curr_line)
+        if underscores:
+            for i, content in enumerate(underscores):
+                curr_line = re.sub(r"(_+)" + content[1] + r"\1", content[1], curr_line)
+
+    return curr_line, linklist
+
+
+def split_text(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC):
+    """
+    Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata
+
+    :param file: the filepath of the file to be split
+    :param main_title: the main title of the file
+    :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
+    :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level
+    :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level
+    :return paragraphs_text: dictionary containing the split sections of text
+    :return paragraphs_metadata: dictionary containing the metadata of each split section of text
+    :return subtitle_order: list containing all encountered subtitles in order of appearance
+    """
+
+    if options[SPLIT_ON_TITLES]:
+        return split_on_titles(file, main_title, options, is_linux_tutorial)
+    elif options[SPLIT_ON_PARAGRAPHS]:
+        return split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number, OS)
+
+
+def split_on_titles(file, main_title, options, is_linux_tutorial):
+    """
+    Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata
+
+    :param file: the filepath of the file to be split
+    :param main_title: the main title of the file
+    :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
+    :return paragraphs_text: dictionary containing the split sections of text
+    :return paragraphs_metadata: dictionary containing the metadata of each split section of text
+    :return subtitle_order: list containing all encountered subtitles in order of appearance
+    """
+
+    if options[VERBOSE]:
+        print("Splitting on titles\n")
+
+    # start of assuming we haven't encountered a title
+    after_first_title = False
+
+    # start of assuming we are not in a code_block
+    in_code_block = False
+
+    # define initial dictionaries
+    paragraphs_os_free_text = {}
+    paragraphs_os_text = {}
+    paragraphs_metadata = {}
+
+    # variable to keep track of the current paragraph
+    current_paragraph = ""
+
+    # list to keep track of links in the text
+    link_list = []
+
+    # list to keep track of the order of the subtitles
+    subtitle_order = []
+
+    # variable to keep track of how many if-statements deep the current line is
+    in_if_statement = 0
+
+    # variable to indicate that previous section was one with if-statements
+    previous_contained_if = False
+
+    # list to keep track of most recent directories on each title level
+    curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
+
+    with open(file, 'r') as readfile:
+
+        for line in readfile:
+
+            # detect if-statements starting or ending on the current line
+            in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(re.findall(IF_MANGLED_PATTERNS[ENDIF], line))
+
+            # detect codeblocks to make sure titles aren't detected in them
+            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+                in_code_block = not in_code_block
+                if options[VERBOSE]:
+                    if in_code_block:
+                        print("Detected start of a codeblock, not registering titles")
+                    else:
+                        print("Detected end of codeblock, registering titles again")
+
+            # only split up if current line is in a fully non-os-specific section
+            if in_if_statement == 0:
+
+                title_level = check_for_title(line, in_code_block, curr_dirs, options)
+
+                # line is a title with a maximum depth of 4
+                if title_level > 0:
+                    if after_first_title:
+
+                        # write text of previous file
+                        if previous_contained_if:
+                            paragraphs_os_text[title] = current_paragraph
+                            if options[VERBOSE]:
+                                print("Saved os-specific chunk with temporary title: " + title + "\n")
+                        else:
+                            paragraphs_os_free_text[title] = current_paragraph
+                            if options[VERBOSE]:
+                                print("Saved generic chunk with title: " + title + "\n")
+
+                        # write metadata of previous file
+                        paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
+
+                    # make a new title
+                    title = make_valid_title(line[title_level + 1:-1])
+
+                    # create an entry for the file in the paragraphs text dictionary
+                    current_paragraph = ""
+
+                    after_first_title = True
+                    subtitle_order.append(title)
+
+                    # reset link_list
+                    link_list = []
+
+                    previous_contained_if = False
+
+                # line is not a title
+                elif after_first_title:
+                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
+                    if line != "\n":
+                        current_paragraph += line
+
+                # keep track of title level and directory to write to metadata upon discovering a new subtitle
+                if title_level > 0:
+                    last_title_level = title_level
+                    last_dir = curr_dirs[last_title_level]
+            else:
+                previous_contained_if = True
+                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
+                if line != "\n":
+                    current_paragraph += line
+
+    # write dictionaries for the last file
+    if previous_contained_if:
+        paragraphs_os_text[title] = current_paragraph
+        if options[VERBOSE]:
+            print("Saved os-specific chunk with temporary title: " + title + "\n")
+    else:
+        paragraphs_os_free_text[title] = current_paragraph
+        if options[VERBOSE]:
+            print("Saved generic chunk with title: " + title + "\n")
+    paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
+
+    return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
+
+
+def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC):
+    """
+    Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata
+
+    :param file: the filepath of the file to be split
+    :param main_title: the main title of the file
+    :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
+    :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph
+    :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph
+    :return paragraphs_text: dictionary containing the split sections of text
+    :return paragraphs_metadata: dictionary containing the metadata of each split section of text
+    :return subtitle_order: list containing all encountered subtitles in order of appearance
+    """
+
+    if options[VERBOSE]:
+        print("Splitting on paragraphs\n")
+
+    # start of assuming we are not in a code_block
+    in_code_block = False
+
+    # define initial dictionaries
+    paragraphs_os_free_text = {}
+    paragraphs_os_text = {}
+    paragraphs_metadata = {}
+
+    # variable to keep track of the current paragraph
+    current_paragraph = ""
+
+    # list to keep track of links in the text
+    link_list = []
+
+    # list to keep track of the order of the subtitles
+    subtitle_order = []
+
+    # variable to keep track of how many if-statements deep the current line is
+    in_if_statement = 0
+
+    # variable to indicate that previous section was one with if-statements
+    previous_contained_if = False
+
+    # variable to indicate that the previous line was part of a list
+    in_list = False
+
+    # paragraph number to add to title
+    paragraph_number = 1
+
+    # metadata title
+    metadata_title = main_title
+
+    # define metadata data if split occurs on paragraphs and last_title and title_level are known (will be replaced later on in the process)
+    if current_paragraph_number != -1:
+        last_title_level = 4
+        last_dir = "PLACEHOLDER"
+
+    # list to keep track of most recent directories on each title level
+    curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)]
+
+    with open(file, 'r') as readfile:
+
+        # Create two independent iterators from the original file iterator (needed to check for lists)
+        current_line, next_line = tee(readfile)
+
+        # Advance the next_line iterator by one step, so it is always one step ahead
+        next(next_line, None)
+
+        # Process the lines
+        for line, nxt in zip_longest(current_line, next_line, fillvalue=""):
+
+            # detect if-statements starting or ending on the current line
+            in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(
+                re.findall(IF_MANGLED_PATTERNS[ENDIF], line))
+
+            # detect whether the current line is in a list
+            if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line):  # beginning of a list entry
+                in_list = True
+                if options[VERBOSE]:
+                    print("First line of new list entry found, not starting new paragraphs: " + line[:-1])
+            elif re.search(r'^\s{2,}.+$', line) and in_list:  # middle of a list entry
+                pass
+            elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list:  # line(s) between list entries
+                pass
+            elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', nxt):
+                in_list = True
+            elif in_list:
+                if options[VERBOSE]:
+                    print("List ended, starting new paragraphs again")
+                in_list = False
+            else:
+                in_list = False
+
+            # detect codeblocks to make sure titles aren't detected in them
+            if '```' in line or (('<pre><code>' in line) ^ ('</code></pre>' in line)):
+                in_code_block = not in_code_block
+                if options[VERBOSE]:
+                    if in_code_block:
+                        print("Detected start of a codeblock, not starting new paragraphs")
+                    else:
+                        print("Detected end of codeblock, starting new paragraphs again")
+
+            # only split up if current line is in a fully non-os-specific section
+            if in_if_statement == 0:
+
+                title_level = check_for_title(line, in_code_block, curr_dirs, options)
+
+                # check whether a new paragraph should be started
+                if line == "\n" and paragraph_long_enough(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph), options) and not in_code_block and not in_list:
+
+                    # create a title for the previous paragraph
+                    if current_paragraph_number == -1:
+                        paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number)
+                    else:
+                        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number)
+                    paragraph_number += 1
+
+                    # write text of previous file
+                    if previous_contained_if:
+                        paragraphs_os_text[paragraph_title] = current_paragraph
+                        if options[VERBOSE]:
+                            print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n")
+                    else:
+                        paragraphs_os_free_text[paragraph_title] = current_paragraph
+                        if options[VERBOSE]:
+                            print("Saved generic chunk with title: " + paragraph_title + "\n")
+
+                    # write metadata of previous file
+                    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
+                    subtitle_order.append(paragraph_title)
+
+                    # reset the current paragraph
+                    current_paragraph = ""
+
+                    # reset link_list
+                    link_list = []
+
+                    previous_contained_if = False
+
+                # line is a title with a maximum depth of 4
+                elif title_level > 0:
+
+                    # make a new title
+                    metadata_title = make_valid_title(line[title_level + 1:-1])
+
+                    line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title, is_linux_tutorial)
+                    current_paragraph += line
+
+                # line is not a title or the beginning of a new paragraph
+                elif line != "\n" or previous_contained_if:
+                    line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
+                    current_paragraph += line
+
+                # keep track of title level and directory to write to metadata upon discovering a new subtitle
+                if title_level > 0:
+                    last_title_level = title_level
+                    last_dir = curr_dirs[last_title_level]
+            else:
+                previous_contained_if = True
+                line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial)
+                current_paragraph += line
+
+    # create a title for the last paragraph
+    if current_paragraph_number == -1:
+        paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number)
+    else:
+        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number)
+
+    # write dictionaries for the last file
+    if previous_contained_if:
+        paragraphs_os_text[paragraph_title] = current_paragraph
+        if options[VERBOSE]:
+            print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n")
+    else:
+        paragraphs_os_free_text[paragraph_title] = current_paragraph
+        if options[VERBOSE]:
+            print("Saved generic chunk with title: " + paragraph_title + "\n")
+    paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md')
+    subtitle_order.append(paragraph_title)
+
+    return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order
+
+
+def paragraph_long_enough(paragraph, options):
+    """
+    Function that checks if the paragraph is long enough to be split of
+
+    :param paragraph: current paragraph
+    :param options: dictionary containing the options given by the user
+    :return:
+    """
+    encoding = tiktoken.get_encoding("cl100k_base")
+    token_amount = len(encoding.encode(paragraph))
+
+    return token_amount >= options[MIN_PARAGRAPH_LENGTH]
+
+
+def write_metadata(main_title, subtitle, links, title_level, directory, source_file):
+    """
+    Function that writes metadata about a text section to a dictionary
+
+    :param main_title: The main title of the file containing the section
+    :param subtitle: the title of the section
+    :param links: a list of links contained within the section
+    :param title_level: the depth of the title of the section
+    :param directory: the directory where the section will eventually be written (can either be generic or os-specific)
+    :param source_file: the source file that the section originates from
+    :return paragraph_metadata: dictionary containing the metadata about the section
+    """
+
+    paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory}
+
+    if len(links) > 0:
+        paragraph_metadata[LINKS] = {}
+        for i, link in enumerate(links):
+            paragraph_metadata[LINKS][str(i)] = link
+
+    paragraph_metadata[PARENT_TITLE] = Path(directory).parent.name
+
+    return paragraph_metadata
+
+
+def jinja_parser(filename, copy_location, options):
+    """
+    function that let's jinja do its thing to format the files except for the os-related if-statements
+
+    :param filename: the name of the file that needs to be formatted using jinja
+    :param copy_location: the location of the file that needs to be formatted using jinja
+    :param options: dictionary containing the options given by the user
+    :return:
+    """
+    # YAML file location
+    yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml')
+
+    if options[VERBOSE]:
+        print("Reading YAML file from location: " + yml_file_path)
+
+    # Read the YAML file
+    with open(yml_file_path, 'r') as yml_file:
+        words_dict = yaml.safe_load(yml_file)
+
+    # ugly fix for index.md error that occurs because of the macro "config.repo_url" in mkdocs/docs/HPC/index.md
+    additional_context = {
+        'config': {
+            'repo_url': REPO_URL
+        }
+    }
+    combined_context = {**words_dict, **additional_context}
+
+    if options[VERBOSE]:
+        print("Mangling OS-specific if-statements")
+
+    # Mangle the OS-related if-statements
+    mangle_ifs(copy_location, filename, options)
+
+    if options[VERBOSE]:
+        print("Altering other if-statements to parse properly")
+
+    # Alter the other if-statements
+    alter_ifs(filename, options)
+
+    # Use Jinja2 to replace the macros
+    template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)])
+    templateEnv = Environment(loader=template_loader)
+    template = templateEnv.get_template(filename)
+    rendered_content = template.render(combined_context)
+
+    if options[VERBOSE]:
+        print("jinja parsing finished\nWriting jinja-parsed file to location: " + copy_location)
+
+    # Save the rendered content to a new file
+    with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file:
+        output_file.write(rendered_content)
+
+
+def load_macros(name):
+    """
+    function used by the jinja FunctionLoader to retrieve templates from the macros folder since the normal FileSystemLoader can't locate them properly
+
+    :param name: name of the package
+    :return:
+    """
+
+    macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, MACROS)
+
+    if "../" + MACROS + "/" in name:
+        package_name = name.split("../" + MACROS + "/")[1]
+        file_location = os.path.join(macros_location, package_name)
+
+        with open(file_location, 'r') as readfile:
+            return readfile.read()
+
+
+def mangle_os_ifs(line, is_os, options):
+    """
+    function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure.
+    We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements.
+
+    :param line: the current line to check for os-related if-statements
+    :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF
+        NON_OS_IF: not in an os-if
+        NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if
+        OS_IF: in an os-if
+        OS_IF_IN_OS_IF: in an os-if nested in an os-if
+    :param options: dictionary containing the options given by the user
+    :return line: the modified line with  mangled os-related if-statements
+    """
+
+    match = re.search(r'\{%(.*?)%}(.*)', line)
+
+    start_index = 0
+    added_length = 0
+
+    while match:
+
+        constr_match = re.search(r'\{%.*?%}', match.string)
+        if_match = re.search(r'if ', match.group(1))
+        if_os_match = re.search(r'if OS', match.group(1))
+        endif_match = re.search(r'endif', match.group(1))
+        else_match = re.search(r'else', match.group(1))
+
+        # mangle positions
+        pos_first_mangle = constr_match.start() + start_index + added_length + 1
+        pos_second_mangle = constr_match.end() + start_index + added_length - 1
+
+        # different parts of the original string
+        part_before_mangling = line[:pos_first_mangle]
+        part_between_mangling = line[pos_first_mangle:pos_second_mangle]
+        part_after_mangling = line[pos_second_mangle:]
+
+        # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these
+        if endif_match:
+            if is_os in (OS_IF, OS_IF_IN_OS_IF):
+                if options[VERBOSE]:
+                    print("OS-specific endif statement found in line: " + line[:-1])
+                line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
+                added_length += 2 * len(IF_MANGLED_PART)
+                if is_os == OS_IF:
+                    is_os = NON_OS_IF
+                elif is_os == OS_IF_IN_OS_IF:
+                    is_os = OS_IF
+            elif is_os == NON_OS_IF_IN_OS_IF:
+                is_os = OS_IF
+
+        elif if_match:
+            if if_os_match:
+                if options[VERBOSE]:
+                    print("OS-specific if statement found in line:    " + line[:-1])
+                line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
+                added_length += 2 * len(IF_MANGLED_PART)
+                if is_os == OS_IF:
+                    is_os = OS_IF_IN_OS_IF
+                else:
+                    is_os = OS_IF
+            else:
+                if is_os == OS_IF:
+                    is_os = NON_OS_IF_IN_OS_IF
+                else:
+                    is_os = NON_OS_IF
+
+        elif else_match:
+            if is_os in (OS_IF, OS_IF_IN_OS_IF):
+                if options[VERBOSE]:
+                    print("OS-specific else statement found in line:  " + line[:-1])
+                line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling
+                added_length += 2 * len(IF_MANGLED_PART)
+
+        start_index += constr_match.end()
+        match = re.search(r'\{%(.*?)%}(.*)', match.group(2))
+    return line, is_os
+
+
+def mangle_ifs(directory, filename, options):
+    """
+    function that writes the if-mangled version of a file to a location where the jinja parser will use it
+
+    :param directory: the directory of the file to be if mangled
+    :param filename: the filename of the file to be mangled
+    :param options: dictionary containing the options given by the user
+    :return:
+    """
+    # variable to keep track of latest if-statement scope
+    is_os = NON_OS_IF
+
+    with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'w') as write_file:
+        with open(directory, 'r') as read_file:
+            for line in read_file:
+                new_line, is_os = mangle_os_ifs(line, is_os, options)
+                write_file.write(new_line)
+
+
+def alter_ifs(filename, options):
+    """
+    Function that further adapts the if-statements in a file and writes it to a location where the jinja parser will use it.
+    This is because the jinja parser doesn't seem to be able to handle statements like {% site == gent %} with context {'site': 'Gent'} in this case.
+    These statements get changed to {% site == 'Gent' %} in this function.
+
+    :param filename: the filename of the file to be transformed
+    :param options: dictionary containing the options given by the user
+    :return:
+    """
+
+    with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'r') as read_file:
+        content = read_file.read()
+
+    pattern = r'(\{%-?\s?[a-zA-Z\s]*?[!=]=\s?\(?)([a-zA-Z\s]+(?:\sor\s[a-zA-Z\s]+)*)(\)?\s?%})'
+    content = re.sub(pattern,
+                     lambda match: (f"{match.group(1)}" +
+                                    " or ".join([f"'{city.strip().capitalize()}'" for city in match.group(2).split(" or ")]) +
+                                    f"{match.group(3)}"
+                                    ),
+                     content)
+
+    with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES,  filename), 'w') as write_file:
+        write_file.write(content)
+
+
+def make_valid_title(title):
+    """
+    function that makes sure all titles can be used as valid filenames
+
+    :param title: the string that will be used as title and filename
+    :return valid_filename: the adapted title that can be used as filename
+    """
+    # Define a regex pattern for invalid characters on both Windows and Linux
+    invalid_chars = r'[<>:"/\\|?*\0]'
+
+    # get rid of extra information between {} brackets
+    title = re.sub(r'\{.*?}', '', title)
+
+    # Remove invalid characters
+    valid_filename = re.sub(invalid_chars, '', title)
+
+    # Strip leading/trailing whitespace
+    valid_filename = valid_filename.strip().strip('-').replace(' ', '-').replace("--", "-")
+
+    return valid_filename
+
+
+def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options, is_linux_tutorial):
+    """
+    Function that writes text and metadata of a generic (non-os-specific) file
+
+    :param title: title of section
+    :param paragraphs_text: dictionary containing all paragraphs of text
+    :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
+    :param title_order: list containing all subtitles in order
+    :param title_order_number: order number of the title of the section that is being written
+    :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
+    :return:
+    """
+
+    if len(paragraphs_text[title]) > 0:
+        # make the directory needed for the files that will be written
+        filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY])
+        os.makedirs(filepath, exist_ok=True)
+
+        if options[VERBOSE]:
+            print("Writing generic section " + title + " to filepath: " + str(filepath))
+
+        write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial)
+    else:
+        # don't write empty files
+        pass
+
+
+def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options, is_linux_tutorial):
+    """
+    Function to write files to a certain filepath
+
+    :param title: title of the section to be written
+    :param text: section of text to be written
+    :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text
+    :param title_order: list containing all subtitles in order
+    :param title_order_number: order number of the title of the section that is being written
+    :param filepath: filepath to write files to
+    :param OS: OS to be included in the metadata
+    :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
+    :return:
+    """
+
+    metadata = copy.deepcopy(paragraphs_metadata[title])
+
+    file_title = title
+
+    # write text file
+    with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile:
+        if LINKS in paragraphs_metadata[title].keys():
+            adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS], options)
+            writefile.write(adapted_text)
+        else:
+            writefile.write(text)
+
+    # write metadata
+    # check if links in metadata is not empty
+    if LINKS in metadata.keys() and len(metadata[LINKS].keys()) == 0:
+        del metadata[LINKS]
+
+    # add previous subtitle
+    if title_order_number != 0:
+        metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1]
+    else:
+        metadata[PREVIOUS_SUBTITLE] = None
+
+    # add next subtitle
+    if title_order_number != len(title_order) - 1:
+        metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1]
+    else:
+        metadata[NEXT_SUBTITLE] = None
+
+    # add OS
+    metadata[METADATA_OS] = OS
+
+    # add reference link
+    if is_linux_tutorial:
+        linux_part = LINUX_TUTORIAL + "/"
+    else:
+        linux_part = ""
+    if OS == GENERIC:
+        os_part = ""
+    else:
+        os_part = LINK_OS[OS] + "/"
+    if "index" not in paragraphs_metadata[title][MAIN_TITLE]:
+        metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-')
+    else:
+        metadata[REFERENCE_LINK] = DOCS_URL
+
+    # write metadata to file
+    with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile:
+        json.dump(metadata, writefile, indent=4)
+
+
+def insert_links(text, links, options):
+    """
+    Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT
+
+    :param text: The plaintext that needs to be adapted
+    :param links: The links that might need to be inserted
+    :param options: dictionary containing the options given by the user
+    :return text: The adapted plaintext
+    :return links: The links that were actually present in the text
+    """
+
+    present_links = []
+    new_links = {}
+    for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text):
+        present_links.append(link_number.group(1))
+        if options[INCLUDE_LINKS_IN_PLAINTEXT]:
+            text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text)
+        else:
+            text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text)
+
+    for link_number in links.keys():
+        if link_number in present_links:
+            new_links[str(len(new_links.keys()))] = links[link_number]
+
+    return text, new_links
+
+
+def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options, is_linux_tutorial):
+    """
+    Function that splits os-specific sections into subtitles, parses them using jinja and writes them away
+
+    :param text: full os specific section
+    :param metadata: metadata generated for the full os specific section
+    :param subtitle_order: order of the subtitles generated by the splitter
+    :param title_order_number: order number of the section
+    :param all_metadata: all metadata generated by the splitter
+    :param options: dictionary containing the options given by the user
+    :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial
+    :return:
+    """
+
+    # Unmangle if's to use jinja parser
+    text = re.sub(IF_MANGLED_PART, "", text)
+
+    for OS in [LINUX, WINDOWS, MACOS]:
+
+        # slightly alter if-statements to be able to use predefined macros
+        text = re.sub(OS, '"' + OS + '"', text)
+
+        # Use jinja to render a different version of the text for each OS
+        template = Template(text)
+        jinja_text = template.render(OS=OS)
+
+        if len(jinja_text) != 0:
+
+            # add first subtitle in front of section again
+            if options[SPLIT_ON_TITLES] or metadata[SUBTITLE] not in make_valid_title(jinja_text[:len(metadata[SUBTITLE]) + 1]):
+                jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text
+            else:
+                jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text
+
+            # re-adjust text to correct overcorrections
+            jinja_text = re.sub('"' + OS + '"', OS, jinja_text)
+
+            with open(TEMP_JINJA_FILE, 'w') as writefile:
+                writefile.write(jinja_text)
+
+            # split in right way
+            _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, is_linux_tutorial, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS)
+
+            # prepare variables to fix metadata
+            total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:]
+            all_metadata.update(os_specific_metadata)
+
+            # write to files
+            for os_i, os_subtitle in enumerate(os_subtitle_order):
+                # check that file actually has some content
+                if len(os_specific_text[os_subtitle]) > 0:
+                    # add the links to the metadata
+                    if LINKS in metadata.keys():
+                        os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS]
+
+                    # fix parent in the metadata
+                    parent_i = 0
+                    parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1
+                    parent = os_specific_metadata[os_subtitle][MAIN_TITLE]
+
+                    while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order):
+                        if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth:
+                            parent = total_subtitle_order[parent_i]
+                        parent_i += 1
+
+                    if options[SPLIT_ON_PARAGRAPHS] and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]:
+                        os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE]
+                    else:
+                        os_specific_metadata[os_subtitle][PARENT_TITLE] = parent
+
+                    # fix directory in the metadata if needed
+                    if options[DEEP_DIRECTORIES]:
+                        if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]:
+                            os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE])
+                        else:
+                            os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE])
+
+                    # make a directory to save the files
+                    filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])
+                    os.makedirs(filepath, exist_ok=True)
+
+                    if options[VERBOSE]:
+                        print("Writing os-specific section " + os_subtitle + " to filepath: " + str(filepath))
+
+                    # write to files
+                    write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial)
+                else:
+                    # don't write empty files
+                    pass
+        else:
+            # don't split empty texts
+            pass
+
+
+def main(options):
+    """
+    main function
+
+    :param options: dictionary containing the options specified by the user to run the script:
+                    {SOURCE_DIRECTORY: The source directory where the original files are located,
+                    DESTINATION_DIRECTORY: The destination directory where the processed files should be written to,
+                    SPLIT_ON_TITLES: boolean indicating whether to split on titles,
+                    SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES),
+                    MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph,
+                    MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text,
+                    INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext,
+                    DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not,
+                    VERBOSE: enable or disable verbose mode}
+    :return:
+    """
+
+    if options[VERBOSE]:
+        print("Running chatbot parser with options: " + str(options))
+
+    if options[DEEP_DIRECTORIES] and options[VERBOSE]:
+        print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.")
+
+    # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS), ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True)
+
+    # make the necessary directories
+    for directory in [COPIES, PARSED_MDS, IF_MANGLED_FILES]:
+        directory = os.path.join(options[DESTINATION_DIRECTORY], directory)
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+
+    ################### define loop-invariant variables ###################
+
+    # constant that keeps track of the source directory
+    source_directory = options[SOURCE_DIRECTORY]
+
+    # list of all the filenames
+    filenames = {}
+    all_items = os.listdir(source_directory)
+    files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]]
+    for file in files:
+        filenames[file] = os.path.join(source_directory, file)
+
+    # for loops over all files
+    for filename in filenames.keys():
+        ################### define/reset loop specific variables ###################
+
+        # boolean indicating whether the current file is part of the linux tutorial
+        is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename])
+
+        # make a copy of the original file in order to make sure the original does not get altered
+        copy_file = os.path.join(options[DESTINATION_DIRECTORY], COPIES, filename)
+        shutil.copyfile(filenames[filename], copy_file)
+
+        # variable that keeps track of the directories that are used to write in at different levels
+        root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR)
+        root_dir_os_specific = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR)
+        root_dir_os_specific_linux = os.path.join(root_dir_os_specific, LINUX)
+        root_dir_os_specific_windows = os.path.join(root_dir_os_specific, WINDOWS)
+        root_dir_os_specific_macos = os.path.join(root_dir_os_specific, MACOS)
+
+        # variable for the main title (needed for reference links)
+        main_title = filename[:-3]
+
+        # variable that keeps track of the directories that are used to write in at different levels
+        curr_dirs = [filename[:-3] for _ in range(options[MAX_TITLE_DEPTH] + 1)]
+
+        ################### actually parse the md file ###################
+
+        if options[VERBOSE]:
+            print(LINE + "Processing " + filename)
+            print("Location: " + filenames[filename])
+            print("\nMaking directories:")
+
+        # create directories for the source markdown file
+        for directory in [root_dir_generic, root_dir_os_specific, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]:
+            if options[VERBOSE]:
+                print(directory)
+            os.makedirs(directory, exist_ok=True)
+
+        if options[VERBOSE]:
+            print("\nParsing the sourcefile with jinja")
+
+        # process the jinja macros
+        jinja_parser(filename, copy_file, options)
+
+        if options[VERBOSE]:
+            print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)")
+
+        # split the text in paragraphs
+        paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options, is_linux_tutorial)
+
+        if options[VERBOSE]:
+            print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata")
+
+        # for every section, either make the whole section generic, or create an os-specific file for each OS
+        for i, subtitle in enumerate(subtitle_order):
+
+            # generic
+            if subtitle in paragraphs_os_free_text.keys():
+                write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options, is_linux_tutorial)
+
+            # os-specific
+            else:
+                split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial)
+
+        if options[VERBOSE]:
+            print("\nFinished processing " + filename)
+
+    if options[VERBOSE]:
+        print(LINE + "Cleaning up directories:")
+        print(os.path.join(options[DESTINATION_DIRECTORY], COPIES))
+        print(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES))
+        print(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL))
+    # clean up temporary directories and files
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True)
+    shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL), ignore_errors=True)
+    if os.path.exists(TEMP_JINJA_FILE):
+        os.remove(TEMP_JINJA_FILE)
+
+    if options[VERBOSE]:
+        print("Parsing finished successfully")
+
+
+################### run the script ###################
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n")
+
+    # adding command-line options
+    parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located")
+    parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to")
+    parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.")
+    parser.add_argument("-pl", "--min_paragraph_length", type=int, default=512, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)")
+    parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)")
+    parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts")
+    parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled")
+    parser.add_argument("-v", "--verbose", action="store_true", help="Run the script with verbose output")
+
+    args = parser.parse_args()
+
+    options_dict = {SOURCE_DIRECTORY: args.source,
+                    DESTINATION_DIRECTORY: args.destination,
+                    SPLIT_ON_TITLES: args.split_on_titles,
+                    SPLIT_ON_PARAGRAPHS: not args.split_on_titles,
+                    MIN_PARAGRAPH_LENGTH: args.min_paragraph_length,
+                    MAX_TITLE_DEPTH: args.max_title_depth,
+                    INCLUDE_LINKS_IN_PLAINTEXT: args.links,
+                    DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles,
+                    VERBOSE: args.verbose}
+
+    main(options_dict)
diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt
new file mode 100644
index 00000000000..37137582aad
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/requirements.txt
@@ -0,0 +1,4 @@
+PyYAML==6.0.2
+Jinja2==3.1.4
+tiktoken~=0.7.0
+pathlib~=1.0.1
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
new file mode 100644
index 00000000000..94270ff37e3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
@@ -0,0 +1,6 @@
+Main title
+This is the first paragraph of text. It is non-os-specific, however it does contain a link.
+It also contains some other Markdown syntax and an
+example code block.
+This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum
+character limit for a section).
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
new file mode 100644
index 00000000000..08c0b4e4973
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Main-title",
+    "source_file": "tests/test_files/ftps/tps1.md",
+    "title_depth": 1,
+    "directory": "tps1",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/generic"
+    },
+    "parent_title": "",
+    "previous_title": null,
+    "next_title": "tps1_paragraph_2",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
new file mode 100644
index 00000000000..58eedc06aa0
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
@@ -0,0 +1,3 @@
+Conclusion
+Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I 
+might add to this if needed.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
new file mode 100644
index 00000000000..2f1ea4dcd1f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Conclusion",
+    "source_file": "tests/test_files/ftps/tps1.md",
+    "title_depth": 2,
+    "directory": "tps1",
+    "parent_title": "",
+    "previous_title": "tps1_paragraph_2",
+    "next_title": null,
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
new file mode 100644
index 00000000000..d0ee9ce8256
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
@@ -0,0 +1,4 @@
+OS specific sections
+This is the second section, it is the start of some 
+text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will
+still add a link.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
new file mode 100644
index 00000000000..208cb3472f4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tps1",
+    "subtitle": "OS-specific-sections",
+    "source_file": "tests/test_files/ftps/tps1.md",
+    "title_depth": 2,
+    "directory": "tps1",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/linuxmacos"
+    },
+    "previous_title": "tps1_paragraph_1",
+    "next_title": "tps1_linux_paragraph_2.2",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
new file mode 100644
index 00000000000..1a3867e69fa
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
@@ -0,0 +1,3 @@
+Non Windows section
+Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
+section that ends right here.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
new file mode 100644
index 00000000000..b975dfe4e03
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Non-Windows-section",
+    "source_file": "tests/test_files/ftps/tps1.md",
+    "title_depth": 3,
+    "directory": "tps1",
+    "parent_title": "OS-specific-sections",
+    "previous_title": "tps1_linux_paragraph_2.1",
+    "next_title": "tps1_paragraph_3",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
new file mode 100644
index 00000000000..e0642d6ac96
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
@@ -0,0 +1,4 @@
+OS specific sections
+This is the second section, it is the start of some 
+text specific to OSes that aren't "windows". I feel like there is no need to make this section very long, however I will
+still add a link.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
new file mode 100644
index 00000000000..9c605eb9004
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tps1",
+    "subtitle": "OS-specific-sections",
+    "source_file": "tests/test_files/ftps/tps1.md",
+    "title_depth": 2,
+    "directory": "tps1",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/linuxmacos"
+    },
+    "previous_title": "tps1_paragraph_1",
+    "next_title": "tps1_macos_paragraph_2.2",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
new file mode 100644
index 00000000000..1a3867e69fa
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
@@ -0,0 +1,3 @@
+Non Windows section
+Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
+section that ends right here.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
new file mode 100644
index 00000000000..e3ca81d7cc5
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Non-Windows-section",
+    "source_file": "tests/test_files/ftps/tps1.md",
+    "title_depth": 3,
+    "directory": "tps1",
+    "parent_title": "OS-specific-sections",
+    "previous_title": "tps1_macos_paragraph_2.1",
+    "next_title": "tps1_paragraph_3",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
new file mode 100644
index 00000000000..9a9cbe1f3d2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
@@ -0,0 +1,7 @@
+OS specific sections
+This is the second section, it is the start of some  text specific to windows.
+In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer 
+to make sure we get a long section that is over the minimum required length for the next newline character to be 
+classified as the end of this section. I am doing this because for the next sections I want to test whether they will be
+grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, 
+let's add a link in this section as well.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
new file mode 100644
index 00000000000..ab58c622b8c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tps1",
+    "subtitle": "OS-specific-sections",
+    "source_file": "tests/test_files/ftps/tps1.md",
+    "title_depth": 2,
+    "directory": "tps1",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/windows"
+    },
+    "previous_title": "tps1_paragraph_1",
+    "next_title": "tps1_windows_paragraph_2.2",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
new file mode 100644
index 00000000000..6b57235f68f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
@@ -0,0 +1,6 @@
+Windows specific section
+Like this.
+And this.
+And also this.
+These section should all be grouped together under the windows specific section of the output. The addition of this long
+section at the end should make sure the combination of sections comes to an end here.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
new file mode 100644
index 00000000000..435c9e9c484
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "tps1",
+    "subtitle": "Windows-specific-section",
+    "source_file": "tests/test_files/ftps/tps1.md",
+    "title_depth": 3,
+    "directory": "tps1",
+    "parent_title": "OS-specific-sections",
+    "previous_title": "tps1_windows_paragraph_2.1",
+    "next_title": "tps1_paragraph_3",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md
new file mode 100644
index 00000000000..d9b10d0c524
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md
@@ -0,0 +1,43 @@
+# Main title
+
+This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md).
+It also contains some `other` *Markdown* _syntax_ and an
+```shell
+example code block.
+```
+This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum
+character limit for a section).
+
+## OS specific sections
+
+This is the second section, it is the start of some {% if OS == windows %} text specific to windows.
+In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer 
+to make sure we get a long section that is over the minimum required length for the next newline character to be 
+classified as the end of this section. I am doing this because for the next sections I want to test whether they will be
+grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, 
+let's add [a link](windows.md) in this section as well.
+
+### Windows specific section
+
+Like this.
+
+And this.
+
+And also this.
+
+These section should all be grouped together under the windows specific section of the output. The addition of this long
+section at the end should make sure the combination of sections comes to an end here.
+{% else %}
+text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will
+still add [a link](linuxmacos.md).
+
+### Non Windows section
+
+Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise
+section that ends right here.
+{% endif %}
+
+## Conclusion
+
+Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I 
+might add to this if needed.
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt
new file mode 100644
index 00000000000..f62a4f31fee
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt
@@ -0,0 +1,2 @@
+blablabla
+blablablabla
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
new file mode 100644
index 00000000000..b7786c066a7
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-1",
+    "source_file": "tests/test_files/ftts/tts1.md",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-1",
+    "parent_title": "Main-title",
+    "previous_title": "Main-title",
+    "next_title": "Subtitle-2-g",
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-1"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt
new file mode 100644
index 00000000000..bdf68551202
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt
@@ -0,0 +1 @@
+blablabla
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
new file mode 100644
index 00000000000..eb5403804e2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
@@ -0,0 +1,12 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-5-g",
+    "source_file": "tests/test_files/ftts/tts1.md",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-5-g",
+    "parent_title": "Main-title",
+    "previous_title": "Subtitle-2-g",
+    "next_title": null,
+    "OS": "generic",
+    "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-5-g"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
new file mode 100644
index 00000000000..48125d91679
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
@@ -0,0 +1,4 @@
+blablabla generic
+blablabla generic
+blablabla Linux macOS
+blablablabla Linux macOS with a link
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
new file mode 100644
index 00000000000..f7330bec86d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-2-g",
+    "source_file": "tests/test_files/ftts/tts1.md",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-2-g",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/linuxmacos"
+    },
+    "previous_title": "Subtitle-1",
+    "next_title": "Subtitle-4-l&m",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-2-g"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
new file mode 100644
index 00000000000..b221f26074b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
@@ -0,0 +1,3 @@
+blablabla Linux macOS
+blablablabla Linux macOS
+blablabla generic with a link
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
new file mode 100644
index 00000000000..a76f852c874
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-4-l&m",
+    "source_file": "tests/test_files/ftts/tts1.md",
+    "title_depth": 3,
+    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
+    "parent_title": "Subtitle-2-g",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/generic"
+    },
+    "previous_title": "Subtitle-2-g",
+    "next_title": "Subtitle-5-g",
+    "OS": "linux",
+    "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-4-lm"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
new file mode 100644
index 00000000000..48125d91679
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
@@ -0,0 +1,4 @@
+blablabla generic
+blablabla generic
+blablabla Linux macOS
+blablablabla Linux macOS with a link
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
new file mode 100644
index 00000000000..8b234c92fa6
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-2-g",
+    "source_file": "tests/test_files/ftts/tts1.md",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-2-g",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/linuxmacos"
+    },
+    "previous_title": "Subtitle-1",
+    "next_title": "Subtitle-4-l&m",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-2-g"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
new file mode 100644
index 00000000000..b221f26074b
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt
@@ -0,0 +1,3 @@
+blablabla Linux macOS
+blablablabla Linux macOS
+blablabla generic with a link
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
new file mode 100644
index 00000000000..732d309da81
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-4-l&m",
+    "source_file": "tests/test_files/ftts/tts1.md",
+    "title_depth": 3,
+    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
+    "parent_title": "Subtitle-2-g",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/generic"
+    },
+    "previous_title": "Subtitle-2-g",
+    "next_title": "Subtitle-5-g",
+    "OS": "macos",
+    "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-4-lm"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
new file mode 100644
index 00000000000..f9f20592832
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt
@@ -0,0 +1,4 @@
+blablabla generic
+blablabla generic
+blablabla windows
+blablabla windows with a link
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
new file mode 100644
index 00000000000..7a43426a85f
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-2-g",
+    "source_file": "tests/test_files/ftts/tts1.md",
+    "title_depth": 2,
+    "directory": "tts1\\Main-title\\Subtitle-2-g",
+    "parent_title": "Main-title",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/windows"
+    },
+    "previous_title": "Subtitle-1",
+    "next_title": "Subtitle-3-w",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-2-g"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt
new file mode 100644
index 00000000000..0b587cef85a
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt
@@ -0,0 +1,3 @@
+blablabla windows
+blablablabla windows
+blablabla generic with a link
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
new file mode 100644
index 00000000000..4d7f494320d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
@@ -0,0 +1,15 @@
+{
+    "main_title": "tts1",
+    "subtitle": "Subtitle-3-w",
+    "source_file": "tests/test_files/ftts/tts1.md",
+    "title_depth": 3,
+    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w",
+    "parent_title": "Subtitle-2-g",
+    "links": {
+        "0": "https://docs.hpc.ugent.be/generic"
+    },
+    "previous_title": "Subtitle-2-g",
+    "next_title": "Subtitle-5-g",
+    "OS": "windows",
+    "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-3-w"
+}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md
new file mode 100644
index 00000000000..2f3ad7f9c08
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md
@@ -0,0 +1,31 @@
+# Main title
+
+## Subtitle 1
+
+blablabla
+blablablabla
+
+## Subtitle 2 g
+
+blablabla generic
+blablabla generic
+{% if OS == windows %}blablabla windows
+blablabla windows with a [link](windows.md)
+
+### Subtitle 3 w
+
+blablabla windows
+blablablabla windows
+{% else %}blablabla Linux macOS
+blablablabla Linux macOS with a [link](linuxmacos.md)
+
+### Subtitle 4 l&m
+
+blablabla Linux macOS
+blablablabla Linux macOS
+{% endif %}
+blablabla generic with a [link](generic.md)
+
+## Subtitle 5 g
+
+blablabla
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
new file mode 100644
index 00000000000..6a74b3c0181
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md
@@ -0,0 +1,4 @@
+test1: OS_IF
+{% if OS == windows %}
+test1
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
new file mode 100644
index 00000000000..2f9cdc38294
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md
@@ -0,0 +1,4 @@
+test1: OS_IF
+{-if-% if OS == windows %-if-}
+test1
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
new file mode 100644
index 00000000000..360a4a59ba3
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md
@@ -0,0 +1,7 @@
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{% if OS == windows %}
+test2
+{% endif %}
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
new file mode 100644
index 00000000000..798dcf6db24
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md
@@ -0,0 +1,7 @@
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{-if-% if OS == windows %-if-}
+test2
+{-if-% endif %-if-}
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
new file mode 100644
index 00000000000..d93125a5971
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md
@@ -0,0 +1,6 @@
+test3: OS_IF with else
+{% if OS == linux %}
+test3
+{% else %}
+test3
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
new file mode 100644
index 00000000000..02141961338
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md
@@ -0,0 +1,6 @@
+test3: OS_IF with else
+{-if-% if OS == linux %-if-}
+test3
+{-if-% else %-if-}
+test3
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
new file mode 100644
index 00000000000..cc15fae1df1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md
@@ -0,0 +1,4 @@
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
new file mode 100644
index 00000000000..cc15fae1df1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md
@@ -0,0 +1,4 @@
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
new file mode 100644
index 00000000000..bdb288474e2
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md
@@ -0,0 +1,11 @@
+test5: OS_IF in OS_IF
+{% if OS == windows %}
+test5
+{% else %}
+{% if OS == linux %}
+test5
+{% else %}
+test5
+{% endif %}
+test5
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
new file mode 100644
index 00000000000..10443eb67a4
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md
@@ -0,0 +1,11 @@
+test5: OS_IF in OS_IF
+{-if-% if OS == windows %-if-}
+test5
+{-if-% else %-if-}
+{-if-% if OS == linux %-if-}
+test5
+{-if-% else %-if-}
+test5
+{-if-% endif %-if-}
+test5
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
new file mode 100644
index 00000000000..0731ee3588c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md
@@ -0,0 +1,8 @@
+test6: NON_OS_IF in OS_IF
+{% if OS == macos %}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{% endif %}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
new file mode 100644
index 00000000000..cd37117cb00
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md
@@ -0,0 +1,8 @@
+test6: NON_OS_IF in OS_IF
+{-if-% if OS == macos %-if-}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{-if-% endif %-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
new file mode 100644
index 00000000000..6a72a338527
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md
@@ -0,0 +1,9 @@
+test7: weird spacing and dashes
+	{%if OS == windows %}
+	test7
+{%- else%}
+	test7
+		{% if OS == linux%}
+test7
+	{%-endif %}
+{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
new file mode 100644
index 00000000000..dfe342ebfb1
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md
@@ -0,0 +1,9 @@
+test7: weird spacing and dashes
+	{-if-%if OS == windows %-if-}
+	test7
+{-if-%- else%-if-}
+	test7
+		{-if-% if OS == linux%-if-}
+test7
+	{-if-%-endif %-if-}
+{-if-%endif%-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
new file mode 100644
index 00000000000..fb8c1f8b539
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md
@@ -0,0 +1,55 @@
+test1: OS_IF
+{% if OS == windows %}
+test1
+{% endif %}
+
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{% if OS == windows %}
+test2
+{% endif %}
+{% endif %}
+
+test3: OS_IF with else
+{% if OS == linux %}
+test3
+{% else %}
+test3
+{% endif %}
+
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
+
+test5: OS_IF in OS_IF
+{% if OS == windows %}
+test5
+{% else %}
+{% if OS == linux %}
+test5
+{% else %}
+test5
+{% endif %}
+test5
+{% endif %}
+
+test6: NON_OS_IF in OS_IF
+{% if OS == macos %}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{% endif %}
+
+test7: weird spacing and dashes
+	{%if OS == windows %}
+	test7
+{%- else%}
+	test7
+		{% if OS == linux%}
+test7
+	{%-endif %}
+{%endif%}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
new file mode 100644
index 00000000000..796e94348fa
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md
@@ -0,0 +1,55 @@
+test1: OS_IF
+{-if-% if OS == windows %-if-}
+test1
+{-if-% endif %-if-}
+
+test2: OS_IF in NON_OS_IF
+{% if site == Gent %}
+test2
+{-if-% if OS == windows %-if-}
+test2
+{-if-% endif %-if-}
+{% endif %}
+
+test3: OS_IF with else
+{-if-% if OS == linux %-if-}
+test3
+{-if-% else %-if-}
+test3
+{-if-% endif %-if-}
+
+test4: OS_IF with wrong syntax
+{ if OS == macos }
+test4
+{ endif }
+
+test5: OS_IF in OS_IF
+{-if-% if OS == windows %-if-}
+test5
+{-if-% else %-if-}
+{-if-% if OS == linux %-if-}
+test5
+{-if-% else %-if-}
+test5
+{-if-% endif %-if-}
+test5
+{-if-% endif %-if-}
+
+test6: NON_OS_IF in OS_IF
+{-if-% if OS == macos %-if-}
+test6
+{% if site == Gent %}
+test6
+{% endif %}
+test6
+{-if-% endif %-if-}
+
+test7: weird spacing and dashes
+	{-if-%if OS == windows %-if-}
+	test7
+{-if-%- else%-if-}
+	test7
+		{-if-% if OS == linux%-if-}
+test7
+	{-if-%-endif %-if-}
+{-if-%endif%-if-}
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
new file mode 100644
index 00000000000..1e18a1495d5
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md
@@ -0,0 +1,15 @@
+# Title
+
+Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.
+
+1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.
+
+2. Second entry
+
+3. Third entry
+
+    ![image](img/an_image_for_the_third_entry.png)
+
+4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.
+
+And now the text continues like normal in a new section.
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
new file mode 100644
index 00000000000..91605dec651
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
@@ -0,0 +1,68 @@
+import pytest
+import os
+import shutil
+from chatbot_parser import main
+
+
+@pytest.mark.parametrize("input_directory,actual_output_directory,expected_output_directory, options", [
+    ("tests/test_files/ftps", "tests/test_files/ftps/actual",
+     "tests/test_files/ftps/output",
+     {"SOURCE_DIRECTORY": "tests/test_files/ftps",
+      "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual",
+      "SPLIT_ON_TITLES": False,
+      "SPLIT_ON_PARAGRAPHS": True,
+      "MIN_PARAGRAPH_LENGTH": 160,
+      "MAX_TITLE_DEPTH": 4,
+      "INCLUDE_LINKS_IN_PLAINTEXT": False,
+      "DEEP_DIRECTORIES": False,
+      "VERBOSE": False}
+     ),
+    ("tests/test_files/ftts", "tests/test_files/ftts/actual",
+     "tests/test_files/ftts/output",
+     {"SOURCE_DIRECTORY": "tests/test_files/ftts",
+      "DESTINATION_DIRECTORY": "tests/test_files/ftts/actual",
+      "SPLIT_ON_TITLES": True,
+      "SPLIT_ON_PARAGRAPHS": False,
+      "MIN_PARAGRAPH_LENGTH": 160,
+      "MAX_TITLE_DEPTH": 4,
+      "INCLUDE_LINKS_IN_PLAINTEXT": False,
+      "DEEP_DIRECTORIES": True,
+      "VERBOSE": False}
+     )
+])
+def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options):
+    # run the script
+    main(options)
+
+    # Compare directories and files
+    for dirpath, dirnames, filenames in os.walk(expected_output_directory):
+        relative_path = os.path.relpath(dirpath, expected_output_directory)
+        actual_dir = os.path.join(actual_output_directory, relative_path)
+
+        # Check if the directory exists
+        assert os.path.isdir(actual_dir), f"Directory '{actual_dir}' is missing."
+
+        # Check for files
+        for filename in filenames:
+            ref_file = os.path.join(dirpath, filename)
+            gen_file = os.path.join(actual_dir, filename)
+
+            # Check if the file exists
+            assert os.path.isfile(gen_file), f"File '{gen_file}' is missing."
+
+            # Check file content
+            with open(ref_file, 'r') as ref_f, open(gen_file, 'r') as gen_f:
+                ref_content = ref_f.read().strip()
+                gen_content = gen_f.read().strip()
+                assert ref_content == gen_content, f"Content of file '{gen_file}' does not match."
+
+    # check that not too many directories have been generated
+    for dirpath, dirnames, filenames in os.walk(actual_output_directory):
+        relative_path = os.path.relpath(dirpath, actual_output_directory)
+        expected_dir = os.path.join(expected_output_directory, relative_path)
+
+        # Check if the directory exists
+        assert os.path.isdir(expected_dir), f"Directory '{relative_path}' was made, but shouldn't have been."
+
+    # remove directory
+    shutil.rmtree(actual_output_directory, ignore_errors=True)
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
new file mode 100644
index 00000000000..4d0dd876103
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
@@ -0,0 +1,32 @@
+import pytest
+import os
+import shutil
+from chatbot_parser import mangle_ifs
+
+
+@pytest.mark.parametrize("input_file,output_file", [
+    ("if_mangler_1_input.md", "if_mangler_1_output.md"),
+    ("if_mangler_2_input.md", "if_mangler_2_output.md"),
+    ("if_mangler_3_input.md", "if_mangler_3_output.md"),
+    ("if_mangler_4_input.md", "if_mangler_4_output.md"),
+    ("if_mangler_5_input.md", "if_mangler_5_output.md"),
+    ("if_mangler_6_input.md", "if_mangler_6_output.md"),
+    ("if_mangler_7_input.md", "if_mangler_7_output.md")
+])
+def test_if_mangler(input_file, output_file):
+    # make directory
+    os.makedirs(os.path.join("if_mangled_files"), exist_ok=True)
+
+    # make filepaths
+    input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file)
+    expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file)
+    actual_output_file_path = os.path.join("if_mangled_files", input_file)
+    mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'})
+
+    # check every line
+    with open(expected_output_file_path, "r") as expected_read_file:
+        with open(actual_output_file_path, "r") as actual_read_file:
+            assert all([expected_line == actual_line for expected_line, actual_line in zip(expected_read_file, actual_read_file)])
+
+    # remove directory
+    shutil.rmtree("if_mangled_files", ignore_errors=True)
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py
new file mode 100644
index 00000000000..9109f2518ad
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py
@@ -0,0 +1,31 @@
+import pytest
+from chatbot_parser import insert_links
+
+options_include = {"INCLUDE_LINKS_IN_PLAINTEXT": True}
+options_leave_out = {"INCLUDE_LINKS_IN_PLAINTEXT": False}
+links_input = {"0": "https://first_link.com", "1": "https://second_link.be", "2": "https://docs.hpc.ugent.be/account#welcome-e-mail", "3": "https://final-link.org"}
+
+
+@pytest.mark.parametrize("text_input, options_input, text_output, new_links", [
+    # Text without links
+    # don't include links
+    ("Text without links\nand with two lines.", options_leave_out, "Text without links\nand with two lines.", {}),
+    # include links
+    ("Text without links\nand with two lines.", options_include, "Text without links\nand with two lines.", {}),
+    # Text with all links
+    # don't include links
+    ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_leave_out,
+     "Text with all the links\nand with multiple lines.\n\n\n\n", links_input),
+    # include links
+    ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_include,
+     "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://second_link.be \n https://docs.hpc.ugent.be/account#welcome-e-mail \n https://final-link.org ", links_input),
+    # Text with some links
+    # don't include links
+    ("Text with all the links\nand with multiple lines.\n§link§link§1§link§link§\n§link§link§3§link§link§", options_leave_out,
+     "Text with all the links\nand with multiple lines.\n\n", {"0": "https://second_link.be", "1": "https://final-link.org"}),
+    # include links
+    ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§2§link§link§", options_include,
+     "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://docs.hpc.ugent.be/account#welcome-e-mail ", {"0": "https://first_link.com", "1": "https://docs.hpc.ugent.be/account#welcome-e-mail"})
+])
+def test_insert_links(text_input, options_input, text_output, new_links):
+    assert insert_links(text_input, links_input, options_input) == (text_output, new_links)
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py
new file mode 100644
index 00000000000..d1acca1d740
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_links.py
@@ -0,0 +1,69 @@
+import os
+import pytest
+from urllib import request
+from chatbot_parser import main
+import json
+
+whitelist = ["mailto:hpc@ugent.be"]
+slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"]
+
+options_general = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC",
+                   "DESTINATION_DIRECTORY": ".",
+                   "SPLIT_ON_TITLES": False,
+                   "SPLIT_ON_PARAGRAPHS": True,
+                   "MIN_PARAGRAPH_LENGTH": 683,
+                   "MAX_TITLE_DEPTH": 4,
+                   "INCLUDE_LINKS_IN_PLAINTEXT": False,
+                   "DEEP_DIRECTORIES": False,
+                   "VERBOSE": False}
+options_os_specific = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC/linux-tutorial",
+                       "DESTINATION_DIRECTORY": "./linux-tutorial",
+                       "SPLIT_ON_TITLES": False,
+                       "SPLIT_ON_PARAGRAPHS": True,
+                       "MIN_PARAGRAPH_LENGTH": 683,
+                       "MAX_TITLE_DEPTH": 4,
+                       "INCLUDE_LINKS_IN_PLAINTEXT": False,
+                       "DEEP_DIRECTORIES": False,
+                       "VERBOSE": False}
+
+
+@pytest.mark.parametrize("options", [options_general, options_os_specific])
+def test_all_links(options):
+    all_links = {}
+    main(options)
+    broken_links = {}
+    empty_links = {}
+
+    for (dirpath, dirnames, filenames) in os.walk(os.path.join(options['DESTINATION_DIRECTORY'], 'parsed_mds')):
+        for filename in filenames:
+            all_links[filename] = []
+            if filename.endswith('metadata.json'):
+                data = json.load(open(os.path.join(dirpath, filename)))
+                if 'links' in data.keys():
+                    for key in data['links'].keys():
+                        all_links[filename].append(data['links'][key])
+                all_links[filename].append(data['reference_link'].split("#")[0])
+
+    for filename in all_links.keys():
+        all_links[filename] = list(set(all_links[filename]))
+        for link in all_links[filename]:
+            if len(link) != 0:
+                try:
+                    if link not in whitelist and link not in slow_list:
+                        with request.urlopen(link) as res:
+                            if res.status == 200:
+                                pass
+                except:
+                    print("Broken link in " + filename + ": " + link)
+                    if filename in broken_links.keys():
+                        broken_links[filename].append(link)
+                    else:
+                        broken_links[filename] = [link]
+            else:
+                print("Empty link in " + filename)
+                if filename in empty_links.keys():
+                    empty_links[filename].append(link)
+                else:
+                    empty_links[filename] = [link]
+    assert len(empty_links.keys()) == 0
+    assert len(broken_links.keys()) == 0
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
new file mode 100644
index 00000000000..06e56a5cb2c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
@@ -0,0 +1,27 @@
+import pytest
+from chatbot_parser import split_on_paragraphs
+
+
+@pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [
+    ("./test_files/list_file/list_test.md",
+     "list_test.md",
+     {
+         "SOURCE_DIRECTORY": "./test_files/list_file",
+         "DESTINATION_DIRECTORY": "./test_files/list_file",
+         "SPLIT_ON_TITLES": False,
+         "SPLIT_ON_PARAGRAPHS": True,
+         "MIN_PARAGRAPH_LENGTH": 100,
+         "MAX_TITLE_DEPTH": 4,
+         "INCLUDE_LINKS_IN_PLAINTEXT": False,
+         "DEEP_DIRECTORIES": False,
+         "VERBOSE": False
+     },
+     False,
+     {
+         'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n",
+         'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'
+     }
+     )
+])
+def test_links(file, main_title, options, is_linux_tutorial, expected_text):
+    assert split_on_paragraphs(file, main_title, options, is_linux_tutorial)[1] == expected_text
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
new file mode 100644
index 00000000000..225c368477d
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py
@@ -0,0 +1,14 @@
+import pytest
+from chatbot_parser import make_valid_title
+
+
+@pytest.mark.parametrize("input_string,expected", [
+    ("", ""),
+    ("A-good-filename-with-dashes", "A-good-filename-with-dashes"),
+    (" A very good filename beginning and ending in a space ", "A-very-good-filename-beginning-and-ending-in-a-space"),
+    ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"),
+    ("A filename containing bad characters <>:\"/\\|?*\0", "A-filename-containing-bad-characters"),
+    ("A filename ending with {some jinja garbage}", "A-filename-ending-with")
+])
+def test_make_valid_title(input_string, expected):
+    assert make_valid_title(input_string) == expected
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
new file mode 100644
index 00000000000..f4cee6dd75c
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
@@ -0,0 +1,46 @@
+import pytest
+from chatbot_parser import replace_markdown_markers
+
+
+@pytest.mark.parametrize("input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist", [
+    # baseline test
+    ("A normal line with nothing special", [], False, "", "A normal line with nothing special", []),
+    # image 1
+    ("![image](a-nice-image.png)", [], False, "", "", []),
+    # image 2
+    ("![](img/Look-at-this-photograph.png)", [], False, "", "", []),
+    # link 1 (outside docs)
+    ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "",
+     "A line with a link§link§link§1§link§link§", ["another-link.be", "a-nice-link.com"]),
+    # link 2 (another document within the docs)
+    ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "",
+     "A line with a link to the docs§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
+    # link 3 (the same document)
+    ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md",
+     "A line with a link to the same doc§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
+    # codeblock
+    ("```shell", [], True, "", "", []),
+    # html syntax 1 (normal syntax)
+    ("A line with something in <b>Bold</b>", [], False, "", "A line with something in Bold", []),
+    # html syntax 2 (link)
+    ("A line with another link<a href=website.com>", ["other-website.com"], False, "",
+     "A line with another link§link§link§1§link§link§", ["other-website.com", "website.com"]),
+    # html syntax 3 (style)
+    ("<p style='text-align: center'>A line with style</p>", [], False, "", "A line with style", []),
+    # Bot comment
+    ("<!--INPUT_FOR_BOTSomething about the following table-->", [], False, "", "Something about the following table", []),
+    # non-Bot comment
+    ("<!--Something else about the following table-->", [], False, "", "", []),
+    # something else with <>
+    ("A line with an example where you should put <your own input>", [], False, "", "A line with an example where you should put <your own input>", []),
+    # info/tips/warnings
+    ("!!! warning", [], False, "", " warning", []),
+    # collapsable admonitions
+    ("??? note", [], False, "", " note", []),
+    # Markdown syntax 1 (not in code block)
+    ("`Line` **with** ++a++ _lot_ *of* _++markdown++_ `syntax`", [], False, "", "Line with a lot of markdown syntax", []),
+    # Markdown syntax 2 (in code block)
+    ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [])
+])
+def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist):
+    assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist)
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
new file mode 100644
index 00000000000..6c30fef7985
--- /dev/null
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
@@ -0,0 +1,15 @@
+import pytest
+import os
+from chatbot_parser import write_metadata
+
+
+@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [
+    ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}),
+    ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2,
+     os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source",
+     {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2,
+      "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"),
+      "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}})
+])
+def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output):
+    assert write_metadata(main_title, subtitle, links, title_level, directory, source_file) == output

From d96aeba8a42caea9ec76b5d4a574dade095fff84 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 15:14:34 +0200
Subject: [PATCH 146/152] change filenames

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 24e0b287a0a..152c3b45988 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -536,9 +536,9 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa
 
                     # create a title for the previous paragraph
                     if current_paragraph_number == -1:
-                        paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number)
+                        paragraph_title = main_title + _PARAGRAPH_ + f"{paragraph_number:03}"
                     else:
-                        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number)
+                        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + f"{current_paragraph_number:03}.{paragraph_number:03}"
                     paragraph_number += 1
 
                     # write text of previous file
@@ -588,9 +588,9 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa
 
     # create a title for the last paragraph
     if current_paragraph_number == -1:
-        paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number)
+        paragraph_title = main_title + _PARAGRAPH_ + f"{paragraph_number:03}"
     else:
-        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number)
+        paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + f"{current_paragraph_number:03}.{paragraph_number:03}"
 
     # write dictionaries for the last file
     if previous_contained_if:

From bec9a63b71bb71d566b6d32c5a8a827e88eb098b Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 15:33:09 +0200
Subject: [PATCH 147/152] Updated all tests to incorporate the latest changes

---
 .../chatbot_parser.py                         |  2 +-
 ...paragraph_1.txt => tps1_paragraph_001.txt} |  0
 ....json => tps1_paragraph_001_metadata.json} |  2 +-
 ...paragraph_3.txt => tps1_paragraph_003.txt} |  0
 ....json => tps1_paragraph_003_metadata.json} |  2 +-
 ...1.txt => tps1_linux_paragraph_002.001.txt} |  0
 ...ps1_linux_paragraph_002.001_metadata.json} |  4 +--
 ...2.txt => tps1_linux_paragraph_002.002.txt} |  0
 ...ps1_linux_paragraph_002.002_metadata.json} |  4 +--
 ...1.txt => tps1_macos_paragraph_002.001.txt} |  0
 ...ps1_macos_paragraph_002.001_metadata.json} |  4 +--
 ...2.txt => tps1_macos_paragraph_002.002.txt} |  0
 ...ps1_macos_paragraph_002.002_metadata.json} |  4 +--
 ...txt => tps1_windows_paragraph_002.001.txt} |  0
 ...1_windows_paragraph_002.001_metadata.json} |  4 +--
 ...txt => tps1_windows_paragraph_002.002.txt} |  0
 ...1_windows_paragraph_002.002_metadata.json} |  4 +--
 .../tests/test_full_script.py                 |  2 +-
 .../tests/test_if_mangler.py                  |  2 +-
 .../tests/test_links.py                       |  2 ++
 .../tests/test_lists.py                       | 27 ++++++++++++++++---
 .../tests/test_replace_markdown_markers.py    | 16 +++++------
 22 files changed, 50 insertions(+), 29 deletions(-)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/{tps1_paragraph_1.txt => tps1_paragraph_001.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/{tps1_paragraph_1_metadata.json => tps1_paragraph_001_metadata.json} (90%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/{tps1_paragraph_3.txt => tps1_paragraph_003.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/{tps1_paragraph_3_metadata.json => tps1_paragraph_003_metadata.json} (86%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/{tps1_linux_paragraph_2.1.txt => tps1_linux_paragraph_002.001.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/{tps1_linux_paragraph_2.1_metadata.json => tps1_linux_paragraph_002.001_metadata.json} (80%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/{tps1_linux_paragraph_2.2.txt => tps1_linux_paragraph_002.002.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/{tps1_linux_paragraph_2.2_metadata.json => tps1_linux_paragraph_002.002_metadata.json} (76%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/{tps1_macos_paragraph_2.1.txt => tps1_macos_paragraph_002.001.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/{tps1_macos_paragraph_2.1_metadata.json => tps1_macos_paragraph_002.001_metadata.json} (80%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/{tps1_macos_paragraph_2.2.txt => tps1_macos_paragraph_002.002.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/{tps1_macos_paragraph_2.2_metadata.json => tps1_macos_paragraph_002.002_metadata.json} (76%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/{tps1_windows_paragraph_2.1.txt => tps1_windows_paragraph_002.001.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/{tps1_windows_paragraph_2.1_metadata.json => tps1_windows_paragraph_002.001_metadata.json} (79%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/{tps1_windows_paragraph_2.2.txt => tps1_windows_paragraph_002.002.txt} (100%)
 rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/{tps1_windows_paragraph_2.2_metadata.json => tps1_windows_paragraph_002.002_metadata.json} (77%)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index 152c3b45988..ee45d5455d8 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -165,7 +165,7 @@ def make_valid_link(link, main_title, is_linux_tutorial):
             link = link.replace('../', '')
 
         if link.startswith("#"):
-            link = DOCS_URL + '/' + linux_part + main_title + "/" + link
+            link = DOCS_URL + '/' + linux_part + main_title.replace(".md", "") + "/" + link
         elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]):
             link = DOCS_URL + '/' + linux_part + link.replace(".md", "")
         elif '.md#' in link:
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001.txt
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001_metadata.json
similarity index 90%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001_metadata.json
index 08c0b4e4973..31cbf626d8d 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001_metadata.json
@@ -9,7 +9,7 @@
     },
     "parent_title": "",
     "previous_title": null,
-    "next_title": "tps1_paragraph_2",
+    "next_title": "tps1_paragraph_002",
     "OS": "generic",
     "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003.txt
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003_metadata.json
similarity index 86%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003_metadata.json
index 2f1ea4dcd1f..cc7b47a8b5a 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003_metadata.json
@@ -5,7 +5,7 @@
     "title_depth": 2,
     "directory": "tps1",
     "parent_title": "",
-    "previous_title": "tps1_paragraph_2",
+    "previous_title": "tps1_paragraph_002",
     "next_title": null,
     "OS": "generic",
     "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion"
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001.txt
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001_metadata.json
similarity index 80%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001_metadata.json
index 208cb3472f4..fb165c8e7fc 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001_metadata.json
@@ -8,8 +8,8 @@
     "links": {
         "0": "https://docs.hpc.ugent.be/linuxmacos"
     },
-    "previous_title": "tps1_paragraph_1",
-    "next_title": "tps1_linux_paragraph_2.2",
+    "previous_title": "tps1_paragraph_001",
+    "next_title": "tps1_linux_paragraph_002.002",
     "OS": "linux",
     "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002.txt
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002_metadata.json
similarity index 76%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002_metadata.json
index b975dfe4e03..36cda85cfcc 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002_metadata.json
@@ -5,8 +5,8 @@
     "title_depth": 3,
     "directory": "tps1",
     "parent_title": "OS-specific-sections",
-    "previous_title": "tps1_linux_paragraph_2.1",
-    "next_title": "tps1_paragraph_3",
+    "previous_title": "tps1_linux_paragraph_002.001",
+    "next_title": "tps1_paragraph_003",
     "OS": "linux",
     "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001.txt
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001_metadata.json
similarity index 80%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001_metadata.json
index 9c605eb9004..2de51c7c0e1 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001_metadata.json
@@ -8,8 +8,8 @@
     "links": {
         "0": "https://docs.hpc.ugent.be/linuxmacos"
     },
-    "previous_title": "tps1_paragraph_1",
-    "next_title": "tps1_macos_paragraph_2.2",
+    "previous_title": "tps1_paragraph_001",
+    "next_title": "tps1_macos_paragraph_002.002",
     "OS": "macos",
     "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002.txt
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002_metadata.json
similarity index 76%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002_metadata.json
index e3ca81d7cc5..fb48000e679 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002_metadata.json
@@ -5,8 +5,8 @@
     "title_depth": 3,
     "directory": "tps1",
     "parent_title": "OS-specific-sections",
-    "previous_title": "tps1_macos_paragraph_2.1",
-    "next_title": "tps1_paragraph_3",
+    "previous_title": "tps1_macos_paragraph_002.001",
+    "next_title": "tps1_paragraph_003",
     "OS": "macos",
     "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001.txt
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001_metadata.json
similarity index 79%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001_metadata.json
index ab58c622b8c..00b7fcee452 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001_metadata.json
@@ -8,8 +8,8 @@
     "links": {
         "0": "https://docs.hpc.ugent.be/windows"
     },
-    "previous_title": "tps1_paragraph_1",
-    "next_title": "tps1_windows_paragraph_2.2",
+    "previous_title": "tps1_paragraph_001",
+    "next_title": "tps1_windows_paragraph_002.002",
     "OS": "windows",
     "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002.txt
similarity index 100%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002.txt
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002_metadata.json
similarity index 77%
rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002_metadata.json
index 435c9e9c484..0e38a476d04 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002_metadata.json
@@ -5,8 +5,8 @@
     "title_depth": 3,
     "directory": "tps1",
     "parent_title": "OS-specific-sections",
-    "previous_title": "tps1_windows_paragraph_2.1",
-    "next_title": "tps1_paragraph_3",
+    "previous_title": "tps1_windows_paragraph_002.001",
+    "next_title": "tps1_paragraph_003",
     "OS": "windows",
     "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section"
 }
\ No newline at end of file
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
index 91605dec651..99baf41ebc0 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py
@@ -11,7 +11,7 @@
       "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual",
       "SPLIT_ON_TITLES": False,
       "SPLIT_ON_PARAGRAPHS": True,
-      "MIN_PARAGRAPH_LENGTH": 160,
+      "MIN_PARAGRAPH_LENGTH": 50,
       "MAX_TITLE_DEPTH": 4,
       "INCLUDE_LINKS_IN_PLAINTEXT": False,
       "DEEP_DIRECTORIES": False,
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
index 4d0dd876103..c2ae9fea19e 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py
@@ -21,7 +21,7 @@ def test_if_mangler(input_file, output_file):
     input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file)
     expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file)
     actual_output_file_path = os.path.join("if_mangled_files", input_file)
-    mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'})
+    mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.', "VERBOSE": False})
 
     # check every line
     with open(expected_output_file_path, "r") as expected_read_file:
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py
index d1acca1d740..a13675dd3ad 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_links.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_links.py
@@ -4,6 +4,8 @@
 from chatbot_parser import main
 import json
 
+#################################################### IMPORTANT: This test still fails because there are some invalid links in the documentation ####################################################
+
 whitelist = ["mailto:hpc@ugent.be"]
 slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"]
 
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
index 06e56a5cb2c..d8a3d630c4c 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
@@ -17,10 +17,29 @@
          "VERBOSE": False
      },
      False,
-     {
-         'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n",
-         'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'
-     }
+     {'list_test.md_paragraph_001': 'Title\n'
+                                    'Some explanation about the following list that '
+                                    'is quite long. This could be problematic since '
+                                    'this could mean that the explanation of the '
+                                    'content of the list would be part of a '
+                                    'different paragraph than the list.\n'
+                                    '1. First entry that is very verbose since we '
+                                    'want to hit the character limit for a '
+                                    "paragraph to make sure a list can't be split "
+                                    'in the middle. If this entry is long enough, '
+                                    'the character limit should make it so that any '
+                                    'of the following newlines can be the start of '
+                                    "a new section if the splitter doesn't know it "
+                                    'is in a list.\n'
+                                    '2. Second entry\n'
+                                    '3. Third entry\n'
+                                    '4. Fourth entry that is very verbose, so we '
+                                    'hit the character limit for a section split, '
+                                    "even though it shouldn't be necessary since "
+                                    'the explanation of the list is already well '
+                                    'above the character limit.\n',
+      'list_test.md_paragraph_002': 'And now the text continues like normal in a '
+                                    'new section.'}
      )
 ])
 def test_links(file, main_title, options, is_linux_tutorial, expected_text):
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
index f4cee6dd75c..d9150290f34 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py
@@ -10,14 +10,14 @@
     # image 2
     ("![](img/Look-at-this-photograph.png)", [], False, "", "", []),
     # link 1 (outside docs)
-    ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "",
-     "A line with a link§link§link§1§link§link§", ["another-link.be", "a-nice-link.com"]),
+    ("A line with a [link](https://a-nice-link.com)", ["https://another-link.be"], False, "",
+     "A line with a link§link§link§1§link§link§", ["https://another-link.be", "https://a-nice-link.com"]),
     # link 2 (another document within the docs)
-    ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "",
-     "A line with a link to the docs§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
+    ("A line with a [link to the docs](account.md#welcome-e-mail)", ["https://another-link.be"], False, "",
+     "A line with a link to the docs§link§link§1§link§link§", ["https://another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
     # link 3 (the same document)
-    ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md",
-     "A line with a link to the same doc§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
+    ("A line with a [link to the same doc](#welcome-e-mail)", ["https://another-link.be"], False, "account.md",
+     "A line with a link to the same doc§link§link§1§link§link§", ["https://another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]),
     # codeblock
     ("```shell", [], True, "", "", []),
     # html syntax 1 (normal syntax)
@@ -28,7 +28,7 @@
     # html syntax 3 (style)
     ("<p style='text-align: center'>A line with style</p>", [], False, "", "A line with style", []),
     # Bot comment
-    ("<!--INPUT_FOR_BOTSomething about the following table-->", [], False, "", "Something about the following table", []),
+    ("<!--INPUT_FOR_BOT: Something about the following table-->", [], False, "", "Something about the following table", []),
     # non-Bot comment
     ("<!--Something else about the following table-->", [], False, "", "", []),
     # something else with <>
@@ -43,4 +43,4 @@
     ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [])
 ])
 def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist):
-    assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist)
+    assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, False) == (expected_line, expected_linklist)

From 886da19db4cd8b6469fe04fb604f1e708035d265 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 15:50:25 +0200
Subject: [PATCH 148/152] change working directory of test lists

---
 scripts/HPC_chatbot_preprocessor/tests/test_lists.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
index d8a3d630c4c..56ac3348dfa 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py
@@ -3,7 +3,7 @@
 
 
 @pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [
-    ("./test_files/list_file/list_test.md",
+    ("./tests/test_files/list_file/list_test.md",
      "list_test.md",
      {
          "SOURCE_DIRECTORY": "./test_files/list_file",

From b354bb2c363de592a4cdbcfeb82dfa9f419de32d Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 16:19:45 +0200
Subject: [PATCH 149/152] change directory in metadata to linux version

---
 .../generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json | 2 +-
 .../tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json     | 2 +-
 .../tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json     | 2 +-
 .../Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json    | 2 +-
 .../tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json     | 2 +-
 .../Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json    | 2 +-
 .../tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json     | 2 +-
 .../Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json        | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
index b7786c066a7..e481468cefe 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json
@@ -3,7 +3,7 @@
     "subtitle": "Subtitle-1",
     "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-1",
+    "directory": "tts1/Main-title/Subtitle-1",
     "parent_title": "Main-title",
     "previous_title": "Main-title",
     "next_title": "Subtitle-2-g",
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
index eb5403804e2..100766dd865 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json
@@ -3,7 +3,7 @@
     "subtitle": "Subtitle-5-g",
     "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-5-g",
+    "directory": "tts1/Main-title/Subtitle-5-g",
     "parent_title": "Main-title",
     "previous_title": "Subtitle-2-g",
     "next_title": null,
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
index f7330bec86d..6f42345d013 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -3,7 +3,7 @@
     "subtitle": "Subtitle-2-g",
     "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-2-g",
+    "directory": "tts1/Main-title/Subtitle-2-g",
     "parent_title": "Main-title",
     "links": {
         "0": "https://docs.hpc.ugent.be/linuxmacos"
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
index a76f852c874..351b6f5cca6 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
@@ -3,7 +3,7 @@
     "subtitle": "Subtitle-4-l&m",
     "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 3,
-    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
+    "directory": "tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m",
     "parent_title": "Subtitle-2-g",
     "links": {
         "0": "https://docs.hpc.ugent.be/generic"
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
index 8b234c92fa6..30249d3d155 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -3,7 +3,7 @@
     "subtitle": "Subtitle-2-g",
     "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-2-g",
+    "directory": "tts1/Main-title/Subtitle-2-g",
     "parent_title": "Main-title",
     "links": {
         "0": "https://docs.hpc.ugent.be/linuxmacos"
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
index 732d309da81..087fe810609 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json
@@ -3,7 +3,7 @@
     "subtitle": "Subtitle-4-l&m",
     "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 3,
-    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m",
+    "directory": "tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m",
     "parent_title": "Subtitle-2-g",
     "links": {
         "0": "https://docs.hpc.ugent.be/generic"
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
index 7a43426a85f..da3c61d3edc 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json
@@ -3,7 +3,7 @@
     "subtitle": "Subtitle-2-g",
     "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 2,
-    "directory": "tts1\\Main-title\\Subtitle-2-g",
+    "directory": "tts1/Main-title/Subtitle-2-g",
     "parent_title": "Main-title",
     "links": {
         "0": "https://docs.hpc.ugent.be/windows"
diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
index 4d7f494320d..e07586cf55e 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json
@@ -3,7 +3,7 @@
     "subtitle": "Subtitle-3-w",
     "source_file": "tests/test_files/ftts/tts1.md",
     "title_depth": 3,
-    "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w",
+    "directory": "tts1/Main-title/Subtitle-2-g/Subtitle-3-w",
     "parent_title": "Subtitle-2-g",
     "links": {
         "0": "https://docs.hpc.ugent.be/generic"

From 87e557590c8c5c5e10af46035da41d7f422e5c29 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 16:19:55 +0200
Subject: [PATCH 150/152] change directory in metadata to linux version

---
 scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
index ee45d5455d8..9b6e8dc7f2d 100644
--- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
+++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py
@@ -634,7 +634,7 @@ def write_metadata(main_title, subtitle, links, title_level, directory, source_f
     :return paragraph_metadata: dictionary containing the metadata about the section
     """
 
-    paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory}
+    paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: Path(directory).as_posix()}
 
     if len(links) > 0:
         paragraph_metadata[LINKS] = {}
@@ -1061,6 +1061,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or
                             os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE])
                         else:
                             os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE])
+                        os_specific_metadata[os_subtitle][DIRECTORY] = Path(os_specific_metadata[os_subtitle][DIRECTORY]).as_posix()
 
                     # make a directory to save the files
                     filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY])

From 8f39e08aab216886bc89d495f631510448e4ce31 Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 16:21:48 +0200
Subject: [PATCH 151/152] add pytest to requirements

---
 scripts/HPC_chatbot_preprocessor/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt
index 37137582aad..1b9cb4a5052 100644
--- a/scripts/HPC_chatbot_preprocessor/requirements.txt
+++ b/scripts/HPC_chatbot_preprocessor/requirements.txt
@@ -1,4 +1,5 @@
 PyYAML==6.0.2
 Jinja2==3.1.4
 tiktoken~=0.7.0
-pathlib~=1.0.1
\ No newline at end of file
+pathlib~=1.0.1
+pytest
\ No newline at end of file

From d6c33e75c59f46745795418da20bc242328ec77d Mon Sep 17 00:00:00 2001
From: EwDa291 <ewout.danneels@ugent.be>
Date: Fri, 30 Aug 2024 16:33:04 +0200
Subject: [PATCH 152/152] change test for write_metadata

---
 .../HPC_chatbot_preprocessor/tests/test_write_metadata.py  | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
index 6c30fef7985..cb80d00975c 100644
--- a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
+++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py
@@ -1,14 +1,15 @@
 import pytest
 import os
 from chatbot_parser import write_metadata
+from pathlib import Path
 
 
 @pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [
-    ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}),
+    ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": ".", "parent_title": ""}),
     ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2,
-     os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source",
+     Path(os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle")).as_posix(), "source",
      {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2,
-      "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"),
+      "directory": Path(os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle")).as_posix(),
       "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}})
 ])
 def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output):