From 1ebc3636346a01f6c5709099d34c57f2cb00d7ea Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 8 Aug 2024 11:27:54 +0200 Subject: [PATCH 001/152] initial commit --- .../HPC chatbot preprocessor/.idea/.gitignore | 8 + .../.idea/HPC chatbot preprocessor.iml | 10 + .../inspectionProfiles/Project_Default.xml | 25 ++ .../inspectionProfiles/profiles_settings.xml | 6 + .../HPC chatbot preprocessor/.idea/misc.xml | 7 + .../.idea/modules.xml | 8 + .../HPC chatbot preprocessor/.idea/vcs.xml | 6 + scripts/HPC chatbot preprocessor/main.py | 375 ++++++++++++++++++ .../HPC chatbot preprocessor/start_checker.py | 17 + 9 files changed, 462 insertions(+) create mode 100644 scripts/HPC chatbot preprocessor/.idea/.gitignore create mode 100644 scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml create mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml create mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml create mode 100644 scripts/HPC chatbot preprocessor/.idea/misc.xml create mode 100644 scripts/HPC chatbot preprocessor/.idea/modules.xml create mode 100644 scripts/HPC chatbot preprocessor/.idea/vcs.xml create mode 100644 scripts/HPC chatbot preprocessor/main.py create mode 100644 scripts/HPC chatbot preprocessor/start_checker.py diff --git a/scripts/HPC chatbot preprocessor/.idea/.gitignore b/scripts/HPC chatbot preprocessor/.idea/.gitignore new file mode 100644 index 00000000000..13566b81b01 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml b/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml new file mode 100644 index 00000000000..2c80e126949 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 00000000000..fc946d9cefc --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,25 @@ + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000000..105ce2da2d6 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/misc.xml b/scripts/HPC chatbot preprocessor/.idea/misc.xml new file mode 100644 index 00000000000..54cda8fd6dd --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/modules.xml b/scripts/HPC chatbot preprocessor/.idea/modules.xml new file mode 100644 index 00000000000..58e027d745f --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/vcs.xml b/scripts/HPC chatbot preprocessor/.idea/vcs.xml new file mode 100644 index 00000000000..b2bdec2d71b --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py new file mode 100644 index 00000000000..7f74fb12fb2 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/main.py @@ -0,0 +1,375 @@ +import os +import re +import shutil + +# test_number = int(input("Which test should be run?")) +# +# # Test for strip_markdown (somewhat successful, see findings file) +# +# if test_number == 1: +# import strip_markdown +# +# strip_markdown.strip_markdown_file("C:\\HPC werk\\Chatbot\\md_to_plaintext_test.md") +# +# # Test if copy of document doesn't change original document (successful) +# if test_number == 2: +# import shutil +# +# shutil.copyfile("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test.txt", +# "C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt") +# with open("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt", 'w') as file: +# file.write('hello') + +# Test with actual document + +# make a copy of one of the md files to test some things +shutil.copyfile("C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\getting_started.md", + "C:\\HPC_werk\\Chatbot\\getting_started_copy.md") + +################### define global variables ################### +# variable for the filename (which will be changed into something else in the final version) +filename = "getting_started_copy.md" + +# variable for the main title (needed for reference links) +main_title = filename[:-3] + +# variable that keeps track of the directories that are used to write in at different levels +root_dir_generic = "C:\\HPC_werk\\Chatbot\\parsed_mds\\generic\\" +root_dir_os_specific_linux = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\linux\\" +root_dir_os_specific_windows = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\windows\\" +root_dir_os_specific_macos = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\macos\\" +curr_dirs = [filename[:-3] for i in range(4)] + +# variable to keep track whether we're dealing with OS-specific info or not +OS_specific = False + +# pattern for the regex if-statement to filter out markdown titles +if_pattern = r'^#+ ' + +# variable that keeps track of the latest non-zero level title and corresponding directory +last_title_level = 1 +last_title = None +last_directory = None +last_was_title = False + +# list to keep track of links in the text +links_generic = [] +links_linux = [] +links_windows = [] +links_macos = [] + +# dictionaries to keep track of current OS and location +active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} +active_site_if_states = {"Gent": "inactive", "not-Gent": "inactive"} + +# variable to keep track of the type of if-statement +if_type = "OS" + +# variable to keep track of the macro-replacements at the top of markdown files +replacements = {} + +# variable that is used to detect whether the first title has been encountered yet +after_first_title = False + + +################### define functions ################### + +# function that removes the previous file structure before starting the process of making a new one +def remove_directory_tree(old_directory): + if os.path.exists(old_directory): + shutil.rmtree(old_directory) + + +# function that checks the first lines of a file until a title is found and saves the macro-replacements to the list +def save_replacements(curr_line): + global replacements + match = re.search(r'\{% set (.*?)="(.*?)" %}', curr_line) + replacements[match.group(1)] = match.group(2) + + +# function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title) +def check_for_title_logic(curr_line): + global curr_dirs + match = re.match(if_pattern, curr_line) + if match and len(match.group(0)) <= 4: + return len(match.group(0)) - 1 + else: + return 0 + + +# function that resets the contents of the link_lists +def reset_link_lists(): + global links_generic, links_linux, links_windows, links_macos + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + +# function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables +def check_for_title(curr_line): + global curr_dirs, last_title + logic_output = check_for_title_logic(curr_line) + if logic_output == 0: + return 0, None, None + else: + if last_title is not None: + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) + reset_link_lists() + + curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + curr_line[logic_output + 1:-1].replace(' ', '-') + + create_directory(root_dir_generic + curr_dirs[logic_output]) + create_directory(root_dir_os_specific_linux + curr_dirs[logic_output]) + create_directory(root_dir_os_specific_windows + curr_dirs[logic_output]) + create_directory(root_dir_os_specific_macos + curr_dirs[logic_output]) + + update_lower_curr_dir(curr_dirs[logic_output], logic_output) + return logic_output, curr_line[logic_output + 1:-1].replace(' ', '-'), curr_dirs[logic_output] + + +# function that creates directories if needed +def create_directory(new_directory): + if not os.path.exists(new_directory): + os.mkdir(new_directory) + + +# function that updates the curr_dir variables when needed +def update_lower_curr_dir(curr_directory, level): + global curr_dirs + for i in range(level + 1, 4): + curr_dirs[i] = curr_directory + + +# function that replaces certain markdown structures with the equivalent used on the website +def replace_markdown_markers(curr_line, linklist): + + # replace {{hpcinfra}} + curr_line = re.sub(r'\{\{\s*hpcinfra\s*}}', "HPC-UGent infrastructure", curr_line) + + # replace other replacement macros + for macro in replacements.keys(): + curr_line = re.sub(r'\{\{\s*' + re.escape(macro) + r'\s*}}', replacements[macro], curr_line) + + # replace links with a reference + matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) + if matches: + for match in matches: + print(f"[{match[0]}]({match[1]})") + curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") + linklist.append(match[1]) + + return curr_line, linklist + + +# function that checks for if-statements +def check_if_statements(curr_line): + global if_type + + # check whether the first part of the line contains information wrt if-statements + match = re.search(r'^\{%-\s([^%]*)%}(.*)', curr_line) + + # check whether the line contains information wrt if-statements that is not in its first part + match_large = re.search(r'^(.*)(\{%-\s[^%]*%})(.*)', curr_line) + + if match: + print("################################################################################") + content = match.group(1) + print(content) + + # new if-statement wrt OS + if re.match(r'if OS == ', content): + OS = content[9:-1] + + # set new active OS + active_OS_if_states[OS] = "active" + + # set other active ones on inactive + for other_OS in active_OS_if_states.keys(): + if other_OS != OS and active_OS_if_states[other_OS] == "active": + active_OS_if_states[other_OS] = "inactive" + + if_type = "OS" + + # new if-statement wrt site + elif re.match(r'if site == ', content): + if re.search(r'(?i)gent', content): + active_site_if_states["Gent"] = "active" + active_site_if_states["not-Gent"] = "inactive" + else: + active_site_if_states["not-Gent"] = "active" + if active_site_if_states["Gent"] == "active": + active_site_if_states["Gent"] = "inactive" + if_type = "site" + + # endif statement wrt OS + elif re.match(r'endif ', content) and if_type == "OS": + if str(1) in active_OS_if_states.values(): + active_OS_if_states[list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" + else: + for key in active_OS_if_states.keys(): + active_OS_if_states[key] = "inactive" + + # endif statement wrt site + elif re.match(r'endif ', content) and if_type == "site": + for key in active_site_if_states.keys(): + active_site_if_states[key] = "inactive" + + # else statement wrt OS + elif re.match(r'else ', content) and if_type == "OS": + + i = 0 + for i in range(3): + if str(i) not in active_OS_if_states.values(): + break + + # set the previously active one on inactive until the next endif + key_list = list(active_OS_if_states.keys()) + position = list(active_OS_if_states.values()).index("active") + active_OS_if_states[key_list[position]] = str(i) + + # set inactive ones on active + while "inactive" in active_OS_if_states.values(): + position = list(active_OS_if_states.values()).index("inactive") + active_OS_if_states[key_list[position]] = "active" + + # else statement wrt site + elif re.match(r'else ', content) and if_type == "site": + + # change state of "Gent" and set not-Gent on active + if active_site_if_states["Gent"] == "inactive": + active_site_if_states["Gent"] = "active" + elif active_site_if_states["Gent"] == "active": + active_site_if_states["Gent"] = str(0) + active_site_if_states["not-Gent"] = "active" + + print(active_OS_if_states) + print(active_site_if_states) + + if len(match.group(2)) != 0: + extra_message = match.group(2).lstrip() + print(extra_message) + # check_if_statements(extra_message) + print("check_extra_message") + return "check_extra_message", extra_message, None + + else: + print("done") + return "done", None, None + + elif match_large: + print("################################################################################") + print(active_OS_if_states) + print(active_site_if_states) + print(match_large.group(1)) + print(match_large.group(2)) + print("write_text_and_check_extra_message") + return "write_text_and_check_extra_message", match_large.group(2), match_large.group(1) + + else: + return "write_text", None, curr_line + + +# function that writes a line to a file +def write_text_to_file(file_name, curr_line): + global links_generic, links_linux, links_windows, links_macos + with open(file_name, "a") as write_file: + if "generic" in file_name: + curr_line, links_generic = replace_markdown_markers(curr_line, links_generic) + elif "linux" in file_name: + curr_line, links_linux = replace_markdown_markers(curr_line, links_linux) + elif "windows" in file_name: + curr_line, links_windows = replace_markdown_markers(curr_line, links_windows) + else: + curr_line, links_macos = replace_markdown_markers(curr_line, links_macos) + write_file.write(curr_line) + + +# function that decides what file to write text to +def choose_and_write_to_file(curr_line): + # check that the line is part of the website for gent + if active_site_if_states["Gent"] == "active" or active_site_if_states["Gent"] == "inactive" and active_site_if_states["not-Gent"] == "inactive": + if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": + write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["linux"] == "active": + write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["windows"] == "active": + write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["macos"] == "active": + write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line) + + +# function that adds a reference link at the end of every txt file +def add_reference_link(file_location, reference_link): + with open(file_location, 'a') as write_file: + write_file.write("\nreference: " + reference_link + "\n") + + +# function that adds the links that should be at the end of a file +def write_end_of_file(file_location, OS, linklist): + if len(OS) > 0: + OS = OS + "/" + + # add the links from within the document + with open(file_location, 'a') as write_file: + for i, link in enumerate(linklist): + write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n") + + # finally add the reference link + add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + main_title + "/#" + last_title.lower()) + + +################### actually parse the md file ################### + +# remove the old directories if needed +remove_directory_tree(root_dir_generic) +remove_directory_tree(root_dir_os_specific_linux) +remove_directory_tree(root_dir_os_specific_windows) +remove_directory_tree(root_dir_os_specific_macos) + +# create directories for the source markdown file +create_directory(root_dir_generic) +create_directory(root_dir_os_specific_linux) +create_directory(root_dir_os_specific_windows) +create_directory(root_dir_os_specific_macos) +create_directory(root_dir_generic + curr_dirs[0]) +create_directory(root_dir_os_specific_linux + curr_dirs[0]) +create_directory(root_dir_os_specific_windows + curr_dirs[0]) +create_directory(root_dir_os_specific_macos + curr_dirs[0]) + +# open the file and store line by line in the right file +with open("C:\\HPC_werk\\Chatbot\\getting_started_copy.md", 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + # line is a title with a maximum depth of 3 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + else: + if after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": + choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) + else: + save_replacements(line) + +# write end of file for the last file +write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) +write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) +write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) +write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py new file mode 100644 index 00000000000..50b61cd5213 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/start_checker.py @@ -0,0 +1,17 @@ +import os + +directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC" + +for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + # if filename.endswith("xdmod.md"): + # break + if filename.endswith(".md"): + lines_until_title = 0 + with open(directory + "\\" + filename, "r") as file: + for line in file: + if line[0] == "#": + break + lines_until_title += 1 + print(filename + " : " + str(lines_until_title)) + break From 10edb2050da2ffc7412c7123c190cb4991682daf Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 8 Aug 2024 17:09:18 +0200 Subject: [PATCH 002/152] some cleanup --- .../copies/getting_started_copy.md | 268 ++++++++++++++++++ .../Getting-Access/Getting-Access.txt | 25 ++ .../Getting-Connected/Getting-Connected.txt | 19 ++ .../Getting-Started/Getting-Started.txt | 11 + .../Inspect-your-results.txt | 56 ++++ .../Getting-Started/Next-steps/Next-steps.txt | 15 + .../Submitting-a-job/Submitting-a-job.txt | 60 ++++ .../Transfer-your-files.txt | 21 ++ .../Wait-for-job-to-be-executed.txt | 26 ++ .../Getting-Access/Getting-Access.txt | 2 + .../Getting-Connected/Getting-Connected.txt | 18 ++ .../Getting-Started/Getting-Started.txt | 2 + .../Inspect-your-results.txt | 2 + .../Getting-Started/Next-steps/Next-steps.txt | 2 + .../Submitting-a-job/Submitting-a-job.txt | 2 + .../Transfer-your-files.txt | 21 ++ .../Wait-for-job-to-be-executed.txt | 2 + .../Getting-Access/Getting-Access.txt | 2 + .../Getting-Connected/Getting-Connected.txt | 13 + .../Getting-Started/Getting-Started.txt | 2 + .../Inspect-your-results.txt | 2 + .../Getting-Started/Next-steps/Next-steps.txt | 2 + .../Submitting-a-job/Submitting-a-job.txt | 2 + .../Transfer-your-files.txt | 21 ++ .../Wait-for-job-to-be-executed.txt | 2 + .../Getting-Access/Getting-Access.txt | 2 + .../Getting-Connected/Getting-Connected.txt | 13 + .../Getting-Started/Getting-Started.txt | 2 + .../Inspect-your-results.txt | 2 + .../Getting-Started/Next-steps/Next-steps.txt | 2 + .../Submitting-a-job/Submitting-a-job.txt | 2 + .../Transfer-your-files.txt | 15 + .../Wait-for-job-to-be-executed.txt | 2 + .../HPC chatbot preprocessor/jinja_parser.py | 3 + scripts/HPC chatbot preprocessor/main.py | 26 +- .../HPC chatbot preprocessor/start_checker.py | 2 + 36 files changed, 662 insertions(+), 7 deletions(-) create mode 100644 scripts/HPC chatbot preprocessor/copies/getting_started_copy.md create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt create mode 100644 scripts/HPC chatbot preprocessor/jinja_parser.py diff --git a/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md b/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md new file mode 100644 index 00000000000..8fe33ebc513 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md @@ -0,0 +1,268 @@ +{% set exampleloc="mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist" %} +# Getting Started + +Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the {{hpcinfra}} and submitting your very first job. We'll also walk you through the process step by step using a practical example. + +In addition to this chapter, you might find the [recording of the *Introduction to HPC-UGent* training session](https://www.ugent.be/hpc/en/training/introhpcugent-recording) to be a useful resource. + +Before proceeding, read [the introduction to HPC](introduction.md) to gain an understanding of the {{ hpcinfra }} and related terminology. + +### Getting Access + +To get access to the {{hpcinfra}}, visit [Getting an HPC Account](account.md). + +If you have not used Linux before, +{%- if site == 'Gent' %} +now would be a good time to follow our [Linux Tutorial](linux-tutorial/index.md). +{%- else %} +please learn some basics first before continuing. (see [Appendix C - Useful Linux Commands](useful_linux_commands.md)) +{%- endif %} + +#### A typical workflow looks like this: + +1. Connect to the login nodes +2. Transfer your files to the {{hpcinfra}} +3. Optional: compile your code and test it +4. Create a job script and submit your job +5. Wait for job to be executed +6. Study the results generated by your jobs, either on the cluster or + after downloading them locally. + +We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using [TensorFlow](https://www.tensorflow.org/); +see the [example scripts](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}}). + +### Getting Connected + +There are two options to connect + +- Using a terminal to connect via SSH (for power users) (see [First Time connection to the {{ hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure)) +- [Using the web portal](web_portal.md) + +Considering your operating system is **{{OS}}**, + +{%- if OS == linux %} +it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. + +Assuming you have already generated SSH keys in the previous step ([Getting Access](#getting-access)), and that they are in a default location, you should now be able to login by running the following command: + +
ssh {{userid}}@{{loginnode}}
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Tip + + You can also still use the web portal (see [shell access on web portal](web_portal.md#shell-access)) + +{%- else %} +{%- if OS == windows %} it is recommended to use the web portal. +{%- else %} it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. {%- endif %} + +The [web portal](web_portal.md) offers a convenient way to upload files and gain shell access to the {{hpcinfra}} from a standard web browser (no software installation or configuration required). + +See [shell access](web_portal.md#shell-access) when using the web portal, or +[connection to the {{hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure) when using a terminal. + +Make sure you can get to a shell access to the {{hpcinfra}} before proceeding with the next steps. + +{%- endif %} + +!!! Info + + When having problems see the [connection issues section on the troubleshooting page](troubleshooting.md#sec:connecting-issues). + + +### Transfer your files + +Now that you can login, it is time to transfer files from your local computer to your **home directory** on the {{hpcinfra}}. + +Download [tensorflow_mnist.py](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py) +and [run.sh](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh) example scripts to your computer (from [here](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}})). + +{%- if OS == windows %} + +The [HPC-UGent web portal](https://login.hpc.ugent.be) provides a file browser that allows uploading files. +For more information see the [file browser section](web_portal.md#file-browser). + +Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell. + +!!! Info + + As an alternative, you can use WinSCP (see [our section](connecting.md#winscp)) + +{%- else %} + +On your local machine you can run: +
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py
+curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh
+
+ +Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). +
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
+
ssh  {{userid}}@{{ loginnode }} 
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Info + + For more information about transfering files or `scp`, see [tranfer files from/to hpc](connecting.md#transfer-files-tofrom-the-hpc). + +{%- endif %} + +When running `ls` in your session on the {{hpcinfra}}, you should see the two files listed in your home directory (`~`): + +```shell +$ ls ~ +run.sh tensorflow_mnist.py +``` + +When you do not see these files, make sure you uploaded the files to your **home directory**. + +### Submitting a job + +Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script. + +A job script is a shell script, a text file that specifies the resources, +the software that is used (via `module load` statements), +and the steps that should be executed to run the calculation. + +Our job script looks like this: + +
-- run.sh --
+ +```bash +#!/bin/bash + +module load TensorFlow/2.11.0-foss-2022a + +python tensorflow_mnist.py + +``` +As you can see this job script will run the Python script named **tensorflow_mnist.py**. + + +The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command. + +```shell +module swap cluster/{{othercluster}} +``` + +!!! Tip + + When submitting jobs with limited amount of resources, it is recommended to use the [debug/interactive cluster](interactive_debug.md#interactive-and-debug-cluster): `donphan`. + +{%- if site == 'Gent' %} + + To get a list of all clusters and their hardware, see . + +{%- endif %} + +This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command: + +```shell +$ qsub run.sh +{{jobid}} +``` + +This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job. + +!!! Warning "Make sure you understand what the `module` command does" + + Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, + but our active shell session is still running on the login node. + + It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on. + + When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`). + +For detailed information about `module` commands, read the [running batch jobs](running_batch_jobs.md) chapter. + +### Wait for job to be executed + +Your job is put into a queue before being executed, so it may take a while before it actually starts. +(see [when will my job start?](running_batch_jobs.md#when-will-my-job-start) for scheduling policy). + +You can get an overview of the active jobs using the `qstat` command: +
$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:00  Q {{othercluster}}
+
+ +Eventually, after entering `qstat` again you should see that your job has started running: +
$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:01  R {{othercluster}}
+
+ +If you don't see your job in the output of the `qstat` command anymore, your job has likely completed. + +Read [this section](running_batch_jobs.md#monitoring-and-managing-your-jobs) on how to interpret the output. + +### Inspect your results + +When your job finishes it generates 2 output files: + +- One for normal output messages (*stdout* output channel). +- One for warning and error messages (*stderr* output channel). + +By default located in the directory where you issued `qsub`. + +{%- if site == 'Gent' %} + +!!! Info + + For more information about the stdout and stderr output channels, see this [section](linux-tutorial/beyond_the_basics.md#inputoutput). + +{%- endif %} + +In our example when running ls in the current directory you should see 2 new files: + +- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}}; +- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}. + +!!! Info + + run.sh.e{{jobid}} should be empty (no errors or warnings). + +!!! Warning "Use your own job ID" + + Replace {{jobid}} with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`. + +When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this: +``` +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz +11493376/11490434 [==============================] - 1s 0us/step +Epoch 1/5 +1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133 +Epoch 2/5 +1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571 +Epoch 3/5 +1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675 +Epoch 4/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727 +Epoch 5/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768 +313/313 - 0s - loss: 0.0782 - accuracy: 0.9764 +``` + +Hurray šŸŽ‰, we trained a deep learning model and achieved 97,64 percent accuracy. + +!!! Warning + + When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see [GPU clusters](gpu.md). + + For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster. + +### Next steps + +- [Running interactive jobs](running_interactive_jobs.md) +- [Running jobs with input/output data](running_jobs_with_input_output_data.md) +- [Multi core jobs/Parallel Computing](multi_core_jobs.md) +- [Interactive and debug cluster](interactive_debug.md#interactive-and-debug-cluster) + +For more examples see [Program examples](program_examples.md) and [Job script examples](jobscript_examples.md) diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt new file mode 100644 index 00000000000..f95191b96f0 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt @@ -0,0 +1,25 @@ + +To get access to the HPC-UGent infrastructure, visit Getting an HPC Account[1]. + +If you have not used Linux before, +now would be a good time to follow our Linux Tutorial[2]. + +#### A typical workflow looks like this: + +1. Connect to the login nodes +2. Transfer your files to the HPC-UGent infrastructure +3. Optional: compile your code and test it +4. Create a job script and submit your job +5. Wait for job to be executed +6. Study the results generated by your jobs, either on the cluster or + after downloading them locally. + +We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using TensorFlow[3]; +see the example scripts[4]. + +[1]: account.md +[2]: linux-tutorial/index.md +[3]: https://www.tensorflow.org/ +[4]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist + +reference: docs.hpc.ugent.be/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt new file mode 100644 index 00000000000..94f17ac5070 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt @@ -0,0 +1,19 @@ + +There are two options to connect + +- Using a terminal to connect via SSH (for power users) (see First Time connection to the HPC-UGent infrastructure[1]) +- Using the web portal[2] + +Considering your operating system is **{{OS}}**, + + +!!! Info + + When having problems see the connection issues section on the troubleshooting page[3]. + + +[1]: connecting.md#first-time-connection-to-the-hpc-infrastructure +[2]: web_portal.md +[3]: troubleshooting.md#sec:connecting-issues + +reference: docs.hpc.ugent.be/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt new file mode 100644 index 00000000000..3403b57f2c2 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt @@ -0,0 +1,11 @@ + +Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the HPC-UGent infrastructure and submitting your very first job. We'll also walk you through the process step by step using a practical example. + +In addition to this chapter, you might find the recording of the *Introduction to HPC-UGent* training session[1] to be a useful resource. + +Before proceeding, read the introduction to HPC[2] to gain an understanding of the HPC-UGent infrastructure and related terminology. + +[1]: https://www.ugent.be/hpc/en/training/introhpcugent-recording +[2]: introduction.md + +reference: docs.hpc.ugent.be/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt new file mode 100644 index 00000000000..417416007f5 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt @@ -0,0 +1,56 @@ + +When your job finishes it generates 2 output files: + +- One for normal output messages (*stdout* output channel). +- One for warning and error messages (*stderr* output channel). + +By default located in the directory where you issued `qsub`. + + +!!! Info + + For more information about the stdout and stderr output channels, see this section[1]. + + +In our example when running ls in the current directory you should see 2 new files: + +- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}}; +- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}. + +!!! Info + + run.sh.e{{jobid}} should be empty (no errors or warnings). + +!!! Warning "Use your own job ID" + + Replace {{jobid}} with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`. + +When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this: +``` +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz +11493376/11490434 [==============================] - 1s 0us/step +Epoch 1/5 +1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133 +Epoch 2/5 +1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571 +Epoch 3/5 +1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675 +Epoch 4/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727 +Epoch 5/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768 +313/313 - 0s - loss: 0.0782 - accuracy: 0.9764 +``` + +Hurray šŸŽ‰, we trained a deep learning model and achieved 97,64 percent accuracy. + +!!! Warning + + When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see GPU clusters[2]. + + For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster. + +[1]: linux-tutorial/beyond_the_basics.md#inputoutput +[2]: gpu.md + +reference: docs.hpc.ugent.be/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt new file mode 100644 index 00000000000..804b56b8251 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt @@ -0,0 +1,15 @@ + +- Running interactive jobs[1] +- Running jobs with input/output data[2] +- Multi core jobs/Parallel Computing[3] +- Interactive and debug cluster[4] + +For more examples see Program examples[5] and Job script examples[6] +[1]: running_interactive_jobs.md +[2]: running_jobs_with_input_output_data.md +[3]: multi_core_jobs.md +[4]: interactive_debug.md#interactive-and-debug-cluster +[5]: program_examples.md +[6]: jobscript_examples.md + +reference: docs.hpc.ugent.be/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt new file mode 100644 index 00000000000..edb336fa06b --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt @@ -0,0 +1,60 @@ + +Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script. + +A job script is a shell script, a text file that specifies the resources, +the software that is used (via `module load` statements), +and the steps that should be executed to run the calculation. + +Our job script looks like this: + +
-- run.sh --
+ +```bash +#!/bin/bash + +module load TensorFlow/2.11.0-foss-2022a + +python tensorflow_mnist.py + +``` +As you can see this job script will run the Python script named **tensorflow_mnist.py**. + + +The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command. + +```shell +module swap cluster/{{othercluster}} +``` + +!!! Tip + + When submitting jobs with limited amount of resources, it is recommended to use the debug/interactive cluster[1]: `donphan`. + + + To get a list of all clusters and their hardware, see . + + +This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command: + +```shell +$ qsub run.sh +{{jobid}} +``` + +This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job. + +!!! Warning "Make sure you understand what the `module` command does" + + Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, + but our active shell session is still running on the login node. + + It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on. + + When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`). + +For detailed information about `module` commands, read the running batch jobs[2] chapter. + +[1]: interactive_debug.md#interactive-and-debug-cluster +[2]: running_batch_jobs.md + +reference: docs.hpc.ugent.be/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt new file mode 100644 index 00000000000..94dc30f6712 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt @@ -0,0 +1,21 @@ + +Now that you can login, it is time to transfer files from your local computer to your **home directory** on the HPC-UGent infrastructure. + +Download tensorflow_mnist.py[1] +and run.sh[2] example scripts to your computer (from here[3]). + + +When running `ls` in your session on the HPC-UGent infrastructure, you should see the two files listed in your home directory (`~`): + +```shell +$ ls ~ +run.sh tensorflow_mnist.py +``` + +When you do not see these files, make sure you uploaded the files to your **home directory**. + +[1]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py +[2]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh +[3]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist + +reference: docs.hpc.ugent.be/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt new file mode 100644 index 00000000000..de177946cf9 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt @@ -0,0 +1,26 @@ + +Your job is put into a queue before being executed, so it may take a while before it actually starts. +(see when will my job start?[1] for scheduling policy). + +You can get an overview of the active jobs using the `qstat` command: +
$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:00  Q {{othercluster}}
+
+ +Eventually, after entering `qstat` again you should see that your job has started running: +
$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:01  R {{othercluster}}
+
+ +If you don't see your job in the output of the `qstat` command anymore, your job has likely completed. + +Read this section[2] on how to interpret the output. + +[1]: running_batch_jobs.md#when-will-my-job-start +[2]: running_batch_jobs.md#monitoring-and-managing-your-jobs + +reference: docs.hpc.ugent.be/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt new file mode 100644 index 00000000000..e756b9a3cbe --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt new file mode 100644 index 00000000000..bac5dfcbfbe --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt @@ -0,0 +1,18 @@ +it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. + +Assuming you have already generated SSH keys in the previous step (Getting Access[1]), and that they are in a default location, you should now be able to login by running the following command: + +
ssh {{userid}}@{{loginnode}}
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Tip + + You can also still use the web portal (see shell access on web portal[2]) + +[1]: #getting-access +[2]: web_portal.md#shell-access + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt new file mode 100644 index 00000000000..f0b9d83bed3 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt new file mode 100644 index 00000000000..441b54c7042 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt new file mode 100644 index 00000000000..d72ffccf01a --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt new file mode 100644 index 00000000000..744c2c3db7a --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt new file mode 100644 index 00000000000..aca6e05d28c --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt @@ -0,0 +1,21 @@ + +On your local machine you can run: +
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
+curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
+
+ +Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). +
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
+
ssh  {{userid}}@{{ loginnode }} 
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Info + + For more information about transfering files or `scp`, see tranfer files from/to hpc[1]. + +[1]: connecting.md#transfer-files-tofrom-the-hpc + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt new file mode 100644 index 00000000000..93e6fdff171 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt new file mode 100644 index 00000000000..8732e586981 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt new file mode 100644 index 00000000000..2b1de2be838 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt @@ -0,0 +1,13 @@ +it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. +The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). + +See shell access[2] when using the web portal, or +connection to the HPC-UGent infrastructure[3] when using a terminal. + +Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. + +[1]: web_portal.md +[2]: web_portal.md#shell-access +[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt new file mode 100644 index 00000000000..4e60f862a0a --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt new file mode 100644 index 00000000000..f7ae9f96226 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt new file mode 100644 index 00000000000..71f384bcf17 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt new file mode 100644 index 00000000000..d72ba48195a --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt new file mode 100644 index 00000000000..fce05042ab2 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt @@ -0,0 +1,21 @@ + +On your local machine you can run: +
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
+curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
+
+ +Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). +
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
+
ssh  {{userid}}@{{ loginnode }} 
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Info + + For more information about transfering files or `scp`, see tranfer files from/to hpc[1]. + +[1]: connecting.md#transfer-files-tofrom-the-hpc + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt new file mode 100644 index 00000000000..2ef8770504b --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt new file mode 100644 index 00000000000..874af365704 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt new file mode 100644 index 00000000000..ce0b873b2b0 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt @@ -0,0 +1,13 @@ +it is recommended to use the web portal. +The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). + +See shell access[2] when using the web portal, or +connection to the HPC-UGent infrastructure[3] when using a terminal. + +Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. + +[1]: web_portal.md +[2]: web_portal.md#shell-access +[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt new file mode 100644 index 00000000000..44d1f17b73b --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt new file mode 100644 index 00000000000..730fbbc3b74 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt new file mode 100644 index 00000000000..55df915125a --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt new file mode 100644 index 00000000000..f67d48ece4a --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt new file mode 100644 index 00000000000..dce86fc7cf3 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt @@ -0,0 +1,15 @@ + +The HPC-UGent web portal[1] provides a file browser that allows uploading files. +For more information see the file browser section[2]. + +Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell. + +!!! Info + + As an alternative, you can use WinSCP (see our section[3]) + +[1]: https://login.hpc.ugent.be +[2]: web_portal.md#file-browser +[3]: connecting.md#winscp + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt new file mode 100644 index 00000000000..bdd7387e379 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py new file mode 100644 index 00000000000..d9fd8f1c5ce --- /dev/null +++ b/scripts/HPC chatbot preprocessor/jinja_parser.py @@ -0,0 +1,3 @@ +from jinja2 import Template + +# I shall do this tomorrow, I do not find myself in the possession of enough understanding about jinja to commence with this task today diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 7f74fb12fb2..0331e61cfc5 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -22,9 +22,13 @@ # Test with actual document +# make a copies directory to store the copies +if not os.path.exists(".\\copies"): + os.mkdir(".\\copies") + # make a copy of one of the md files to test some things -shutil.copyfile("C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\getting_started.md", - "C:\\HPC_werk\\Chatbot\\getting_started_copy.md") +shutil.copyfile("..\\..\\mkdocs\\docs\\HPC\\getting_started.md", + ".\\copies\\getting_started_copy.md") ################### define global variables ################### # variable for the filename (which will be changed into something else in the final version) @@ -34,10 +38,10 @@ main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels -root_dir_generic = "C:\\HPC_werk\\Chatbot\\parsed_mds\\generic\\" -root_dir_os_specific_linux = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\linux\\" -root_dir_os_specific_windows = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\windows\\" -root_dir_os_specific_macos = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\macos\\" +root_dir_generic = ".\\copies\\parsed_mds\\generic\\" +root_dir_os_specific_linux = ".\\copies\\parsed_mds\\os_specific\\linux\\" +root_dir_os_specific_windows = ".\\copies\\parsed_mds\\os_specific\\windows\\" +root_dir_os_specific_macos = ".\\copies\\parsed_mds\\os_specific\\macos\\" curr_dirs = [filename[:-3] for i in range(4)] # variable to keep track whether we're dealing with OS-specific info or not @@ -162,6 +166,12 @@ def replace_markdown_markers(curr_line, linklist): curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") linklist.append(match[1]) + # TODO: + # code-blocks + # tips + # warnings + # etc + return curr_line, linklist @@ -331,7 +341,9 @@ def write_end_of_file(file_location, OS, linklist): remove_directory_tree(root_dir_os_specific_macos) # create directories for the source markdown file +create_directory(".\\copies\\parsed_mds") create_directory(root_dir_generic) +create_directory(".\\copies\\parsed_mds\\os_specific") create_directory(root_dir_os_specific_linux) create_directory(root_dir_os_specific_windows) create_directory(root_dir_os_specific_macos) @@ -341,7 +353,7 @@ def write_end_of_file(file_location, OS, linklist): create_directory(root_dir_os_specific_macos + curr_dirs[0]) # open the file and store line by line in the right file -with open("C:\\HPC_werk\\Chatbot\\getting_started_copy.md", 'r') as readfile: +with open(".\\copies\\" + filename, 'r') as readfile: for line in readfile: title_level, title, directory = check_for_title(line) diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py index 50b61cd5213..5661c79ddc9 100644 --- a/scripts/HPC chatbot preprocessor/start_checker.py +++ b/scripts/HPC chatbot preprocessor/start_checker.py @@ -1,3 +1,5 @@ +# THIS IS NOT AN IMPORTANT FILE, DON'T WORRY ABOUT IT, I JUST USED IT TO TEST SOME THINGS + import os directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC" From 85a93ec31da14d3877658a741fd882184695b6cb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 10:39:56 +0200 Subject: [PATCH 003/152] used jinja to replace macros --- .../HPC chatbot preprocessor/if_mangler.py | 47 +++++++ .../HPC chatbot preprocessor/jinja_parser.py | 24 +++- scripts/HPC chatbot preprocessor/main.py | 124 ++++++------------ 3 files changed, 109 insertions(+), 86 deletions(-) create mode 100644 scripts/HPC chatbot preprocessor/if_mangler.py diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py new file mode 100644 index 00000000000..f49ef691fd3 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/if_mangler.py @@ -0,0 +1,47 @@ +import re +import os + + +def create_directory(new_directory): + if not os.path.exists(new_directory): + os.mkdir(new_directory) + +create_directory(".\\if_mangled_files") + +# global variable to keep track of latest if-statement scope +is_os = False + + +def mangle_os_ifs(line): + global is_os + + match = re.search(r'\{%-\s[^%]*%}', line) + if_match = re.search(r'\{%-\sif [^%]*%}', line) + if_os_match = re.search(r'\{%-\sif OS == [^%]*%}', line) + + if match: + if if_match: + if if_os_match: + is_os = True + line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] + else: + is_os = False + else: + if is_os: + line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] + + match = re.search(r'\{%-\s[^%]*%}', line) + + while match and is_os: + line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] + match = re.search(r'\{%-\s[^%]*%}', line) + + return line + + +def mangle_ifs(directory, file): + with open(".\\if_mangled_files\\" + file, 'w') as write_file: + with open(directory + "\\" + file, 'r') as read_file: + for line in read_file: + new_line = mangle_os_ifs(line) + write_file.write(new_line) diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py index d9fd8f1c5ce..1b033bfdc6c 100644 --- a/scripts/HPC chatbot preprocessor/jinja_parser.py +++ b/scripts/HPC chatbot preprocessor/jinja_parser.py @@ -1,3 +1,25 @@ +import yaml from jinja2 import Template +from if_mangler import mangle_ifs -# I shall do this tomorrow, I do not find myself in the possession of enough understanding about jinja to commence with this task today + +# function that let's jinja do its thing to format the files expect for the os-related if-statements +def jinja_parser(filename): + # Read the YAML file + with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: + words_dict = yaml.safe_load(yml_file) + + # Mangle the OS-related if-statements + mangle_ifs('.\\copies', filename) + + # Read the if-mangled Markdown file + with open('.\\if_mangled_files\\' + filename, 'r') as md_file: + md_content = md_file.read() + + # Use Jinja2 to replace the macros + template = Template(md_content) + rendered_content = template.render(words_dict) + + # Save the rendered content to a new file + with open('.\\copies\\' + filename, 'w') as output_file: + output_file.write(rendered_content) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 0331e61cfc5..35769de46ab 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -1,6 +1,7 @@ import os import re import shutil +from jinja_parser import jinja_parser # test_number = int(input("Which test should be run?")) # @@ -26,6 +27,9 @@ if not os.path.exists(".\\copies"): os.mkdir(".\\copies") +if not os.path.exists(".\\parsed_mds"): + os.mkdir(".\\parsed_mds") + # make a copy of one of the md files to test some things shutil.copyfile("..\\..\\mkdocs\\docs\\HPC\\getting_started.md", ".\\copies\\getting_started_copy.md") @@ -38,10 +42,10 @@ main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels -root_dir_generic = ".\\copies\\parsed_mds\\generic\\" -root_dir_os_specific_linux = ".\\copies\\parsed_mds\\os_specific\\linux\\" -root_dir_os_specific_windows = ".\\copies\\parsed_mds\\os_specific\\windows\\" -root_dir_os_specific_macos = ".\\copies\\parsed_mds\\os_specific\\macos\\" +root_dir_generic = ".\\parsed_mds\\generic\\" +root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" +root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" +root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" curr_dirs = [filename[:-3] for i in range(4)] # variable to keep track whether we're dealing with OS-specific info or not @@ -62,17 +66,10 @@ links_windows = [] links_macos = [] -# dictionaries to keep track of current OS and location +# dictionaries to keep track of current OS active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} -active_site_if_states = {"Gent": "inactive", "not-Gent": "inactive"} - -# variable to keep track of the type of if-statement -if_type = "OS" - -# variable to keep track of the macro-replacements at the top of markdown files -replacements = {} -# variable that is used to detect whether the first title has been encountered yet +# variable that shows whether the first title has been reached yet after_first_title = False @@ -84,13 +81,6 @@ def remove_directory_tree(old_directory): shutil.rmtree(old_directory) -# function that checks the first lines of a file until a title is found and saves the macro-replacements to the list -def save_replacements(curr_line): - global replacements - match = re.search(r'\{% set (.*?)="(.*?)" %}', curr_line) - replacements[match.group(1)] = match.group(2) - - # function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title) def check_for_title_logic(curr_line): global curr_dirs @@ -151,13 +141,6 @@ def update_lower_curr_dir(curr_directory, level): # function that replaces certain markdown structures with the equivalent used on the website def replace_markdown_markers(curr_line, linklist): - # replace {{hpcinfra}} - curr_line = re.sub(r'\{\{\s*hpcinfra\s*}}', "HPC-UGent infrastructure", curr_line) - - # replace other replacement macros - for macro in replacements.keys(): - curr_line = re.sub(r'\{\{\s*' + re.escape(macro) + r'\s*}}', replacements[macro], curr_line) - # replace links with a reference matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: @@ -177,13 +160,12 @@ def replace_markdown_markers(curr_line, linklist): # function that checks for if-statements def check_if_statements(curr_line): - global if_type # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{%-\s([^%]*)%}(.*)', curr_line) + match = re.search(r'^\{-if-%-\s([^%]*)%-if-}(.*)', curr_line) # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{%-\s[^%]*%})(.*)', curr_line) + match_large = re.search(r'^(.*)(\{-if-%-\s[^%]*%-if-})(.*)', curr_line) if match: print("################################################################################") @@ -202,34 +184,16 @@ def check_if_statements(curr_line): if other_OS != OS and active_OS_if_states[other_OS] == "active": active_OS_if_states[other_OS] = "inactive" - if_type = "OS" - - # new if-statement wrt site - elif re.match(r'if site == ', content): - if re.search(r'(?i)gent', content): - active_site_if_states["Gent"] = "active" - active_site_if_states["not-Gent"] = "inactive" - else: - active_site_if_states["not-Gent"] = "active" - if active_site_if_states["Gent"] == "active": - active_site_if_states["Gent"] = "inactive" - if_type = "site" - # endif statement wrt OS - elif re.match(r'endif ', content) and if_type == "OS": + elif re.match(r'endif ', content): if str(1) in active_OS_if_states.values(): active_OS_if_states[list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" else: for key in active_OS_if_states.keys(): active_OS_if_states[key] = "inactive" - # endif statement wrt site - elif re.match(r'endif ', content) and if_type == "site": - for key in active_site_if_states.keys(): - active_site_if_states[key] = "inactive" - # else statement wrt OS - elif re.match(r'else ', content) and if_type == "OS": + elif re.match(r'else ', content): i = 0 for i in range(3): @@ -246,18 +210,7 @@ def check_if_statements(curr_line): position = list(active_OS_if_states.values()).index("inactive") active_OS_if_states[key_list[position]] = "active" - # else statement wrt site - elif re.match(r'else ', content) and if_type == "site": - - # change state of "Gent" and set not-Gent on active - if active_site_if_states["Gent"] == "inactive": - active_site_if_states["Gent"] = "active" - elif active_site_if_states["Gent"] == "active": - active_site_if_states["Gent"] = str(0) - active_site_if_states["not-Gent"] = "active" - print(active_OS_if_states) - print(active_site_if_states) if len(match.group(2)) != 0: extra_message = match.group(2).lstrip() @@ -273,7 +226,6 @@ def check_if_statements(curr_line): elif match_large: print("################################################################################") print(active_OS_if_states) - print(active_site_if_states) print(match_large.group(1)) print(match_large.group(2)) print("write_text_and_check_extra_message") @@ -301,15 +253,14 @@ def write_text_to_file(file_name, curr_line): # function that decides what file to write text to def choose_and_write_to_file(curr_line): # check that the line is part of the website for gent - if active_site_if_states["Gent"] == "active" or active_site_if_states["Gent"] == "inactive" and active_site_if_states["not-Gent"] == "inactive": - if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": - write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) - if active_OS_if_states["linux"] == "active": - write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) - if active_OS_if_states["windows"] == "active": - write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line) - if active_OS_if_states["macos"] == "active": - write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": + write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["linux"] == "active": + write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["windows"] == "active": + write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["macos"] == "active": + write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line) # function that adds a reference link at the end of every txt file @@ -325,6 +276,7 @@ def write_end_of_file(file_location, OS, linklist): # add the links from within the document with open(file_location, 'a') as write_file: + write_file.write("\n\n") for i, link in enumerate(linklist): write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n") @@ -341,9 +293,8 @@ def write_end_of_file(file_location, OS, linklist): remove_directory_tree(root_dir_os_specific_macos) # create directories for the source markdown file -create_directory(".\\copies\\parsed_mds") create_directory(root_dir_generic) -create_directory(".\\copies\\parsed_mds\\os_specific") +create_directory(".\\parsed_mds\\os_specific") create_directory(root_dir_os_specific_linux) create_directory(root_dir_os_specific_windows) create_directory(root_dir_os_specific_macos) @@ -352,6 +303,9 @@ def write_end_of_file(file_location, OS, linklist): create_directory(root_dir_os_specific_windows + curr_dirs[0]) create_directory(root_dir_os_specific_macos + curr_dirs[0]) +# process the jinja macros +jinja_parser(filename) + # open the file and store line by line in the right file with open(".\\copies\\" + filename, 'r') as readfile: @@ -366,22 +320,22 @@ def write_end_of_file(file_location, OS, linklist): after_first_title = True # line is not a title - else: - if after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": choose_and_write_to_file(next_action[2]) - else: - save_replacements(line) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) # write end of file for the last file write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) + + +# TODO: directory cleanup From dfff5fabae20307d13cef4f80d22943f7eac87f1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 14:34:01 +0200 Subject: [PATCH 004/152] adapt if-mangler to accommodate for nested if-clauses --- .../HPC chatbot preprocessor/if_mangler.py | 64 +++++++++++-------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py index f49ef691fd3..9980a2e83e8 100644 --- a/scripts/HPC chatbot preprocessor/if_mangler.py +++ b/scripts/HPC chatbot preprocessor/if_mangler.py @@ -1,41 +1,53 @@ import re -import os - - -def create_directory(new_directory): - if not os.path.exists(new_directory): - os.mkdir(new_directory) - -create_directory(".\\if_mangled_files") # global variable to keep track of latest if-statement scope -is_os = False +is_os = 0 # Can be 0, 1 or 2 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if} def mangle_os_ifs(line): global is_os - match = re.search(r'\{%-\s[^%]*%}', line) - if_match = re.search(r'\{%-\sif [^%]*%}', line) - if_os_match = re.search(r'\{%-\sif OS == [^%]*%}', line) + match = re.search(r'\{%(.*?)%}(.*)', line) - if match: - if if_match: - if if_os_match: - is_os = True - line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] - else: - is_os = False - else: - if is_os: - line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] + start_index = 0 + added_length = 0 - match = re.search(r'\{%-\s[^%]*%}', line) + while match: - while match and is_os: - line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] - match = re.search(r'\{%-\s[^%]*%}', line) + constr_match = re.search(r'\{%.*?%}', match.string) + if_match = re.search(r'if ', match.group(1)) + if_os_match = re.search(r'if OS == ', match.group(1)) + endif_match = re.search(r'endif', match.group(1)) + if endif_match: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 0 + elif is_os == 1: + is_os = 2 + elif if_match: + if if_os_match: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 2 + else: + if is_os == 2: + is_os = 1 + else: + is_os = 0 + else: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + start_index += constr_match.end() + match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) return line From 649ddec3fcad3655445aa930f75bf4dd82a9504f Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 14:34:43 +0200 Subject: [PATCH 005/152] adapt the parser to take all files as input, not all files get parsed successfully yet --- scripts/HPC chatbot preprocessor/main.py | 282 ++++++++++++----------- 1 file changed, 150 insertions(+), 132 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 35769de46ab..86bd2ed9c3f 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -3,78 +3,48 @@ import shutil from jinja_parser import jinja_parser -# test_number = int(input("Which test should be run?")) -# -# # Test for strip_markdown (somewhat successful, see findings file) -# -# if test_number == 1: -# import strip_markdown -# -# strip_markdown.strip_markdown_file("C:\\HPC werk\\Chatbot\\md_to_plaintext_test.md") -# -# # Test if copy of document doesn't change original document (successful) -# if test_number == 2: -# import shutil -# -# shutil.copyfile("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test.txt", -# "C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt") -# with open("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt", 'w') as file: -# file.write('hello') - -# Test with actual document - -# make a copies directory to store the copies +# variables for analytics +succeeded = 0 +failed = 0 + +# make the necessary directories if not os.path.exists(".\\copies"): os.mkdir(".\\copies") if not os.path.exists(".\\parsed_mds"): os.mkdir(".\\parsed_mds") -# make a copy of one of the md files to test some things -shutil.copyfile("..\\..\\mkdocs\\docs\\HPC\\getting_started.md", - ".\\copies\\getting_started_copy.md") +if not os.path.exists(".\\if_mangled_files"): + os.mkdir(".\\if_mangled_files") + +# copy the examples to the right location wrt the script in order to allow jinja to work +if not os.path.exists(".\\examples"): + shutil.copytree("..\\..\\mkdocs\\docs\\HPC\\examples", ".\\examples") ################### define global variables ################### -# variable for the filename (which will be changed into something else in the final version) -filename = "getting_started_copy.md" -# variable for the main title (needed for reference links) -main_title = filename[:-3] +# variable that keeps track of the source directories +source_directories = ["..\\..\\mkdocs\\docs\\HPC\\", "..\\..\\mkdocs\\docs\\HPC\\linux-tutorial"] # variable that keeps track of the directories that are used to write in at different levels root_dir_generic = ".\\parsed_mds\\generic\\" root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" -curr_dirs = [filename[:-3] for i in range(4)] - -# variable to keep track whether we're dealing with OS-specific info or not -OS_specific = False - -# pattern for the regex if-statement to filter out markdown titles -if_pattern = r'^#+ ' - -# variable that keeps track of the latest non-zero level title and corresponding directory -last_title_level = 1 -last_title = None -last_directory = None -last_was_title = False -# list to keep track of links in the text -links_generic = [] -links_linux = [] -links_windows = [] -links_macos = [] +# list of all the filenames +filenames = {} +for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + filenames[file] = os.path.join(source_directory, file) -# dictionaries to keep track of current OS -active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} -# variable that shows whether the first title has been reached yet -after_first_title = False +# filenames = {'account.md': '..\\..\\mkdocs\\docs\\HPC\\account.md'} ################### define functions ################### - # function that removes the previous file structure before starting the process of making a new one def remove_directory_tree(old_directory): if os.path.exists(old_directory): @@ -84,7 +54,7 @@ def remove_directory_tree(old_directory): # function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title) def check_for_title_logic(curr_line): global curr_dirs - match = re.match(if_pattern, curr_line) + match = re.match(r'^#+ ', curr_line) if match and len(match.group(0)) <= 4: return len(match.group(0)) - 1 else: @@ -109,12 +79,16 @@ def check_for_title(curr_line): else: if last_title is not None: write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos) reset_link_lists() - curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + curr_line[logic_output + 1:-1].replace(' ', '-') + curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + make_valid_title( + curr_line[logic_output + 1:-1].replace(' ', '-')) create_directory(root_dir_generic + curr_dirs[logic_output]) create_directory(root_dir_os_specific_linux + curr_dirs[logic_output]) @@ -122,7 +96,7 @@ def check_for_title(curr_line): create_directory(root_dir_os_specific_macos + curr_dirs[logic_output]) update_lower_curr_dir(curr_dirs[logic_output], logic_output) - return logic_output, curr_line[logic_output + 1:-1].replace(' ', '-'), curr_dirs[logic_output] + return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] # function that creates directories if needed @@ -140,19 +114,16 @@ def update_lower_curr_dir(curr_directory, level): # function that replaces certain markdown structures with the equivalent used on the website def replace_markdown_markers(curr_line, linklist): - # replace links with a reference matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: for match in matches: - print(f"[{match[0]}]({match[1]})") curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") linklist.append(match[1]) - # TODO: - # code-blocks - # tips - # warnings + # TODO: code-blocks + # TODO: tips + # TODO: warnings # etc return curr_line, linklist @@ -160,7 +131,7 @@ def replace_markdown_markers(curr_line, linklist): # function that checks for if-statements def check_if_statements(curr_line): - + # TODO: adapt regex for annoying inconsistencies # check whether the first part of the line contains information wrt if-statements match = re.search(r'^\{-if-%-\s([^%]*)%-if-}(.*)', curr_line) @@ -168,9 +139,7 @@ def check_if_statements(curr_line): match_large = re.search(r'^(.*)(\{-if-%-\s[^%]*%-if-})(.*)', curr_line) if match: - print("################################################################################") content = match.group(1) - print(content) # new if-statement wrt OS if re.match(r'if OS == ', content): @@ -187,7 +156,8 @@ def check_if_statements(curr_line): # endif statement wrt OS elif re.match(r'endif ', content): if str(1) in active_OS_if_states.values(): - active_OS_if_states[list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" + active_OS_if_states[ + list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" else: for key in active_OS_if_states.keys(): active_OS_if_states[key] = "inactive" @@ -210,25 +180,14 @@ def check_if_statements(curr_line): position = list(active_OS_if_states.values()).index("inactive") active_OS_if_states[key_list[position]] = "active" - print(active_OS_if_states) - if len(match.group(2)) != 0: extra_message = match.group(2).lstrip() - print(extra_message) - # check_if_statements(extra_message) - print("check_extra_message") return "check_extra_message", extra_message, None else: - print("done") return "done", None, None elif match_large: - print("################################################################################") - print(active_OS_if_states) - print(match_large.group(1)) - print(match_large.group(2)) - print("write_text_and_check_extra_message") return "write_text_and_check_extra_message", match_large.group(2), match_large.group(1) else: @@ -253,7 +212,8 @@ def write_text_to_file(file_name, curr_line): # function that decides what file to write text to def choose_and_write_to_file(curr_line): # check that the line is part of the website for gent - if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": + if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ + active_OS_if_states["macos"] == "inactive": write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) if active_OS_if_states["linux"] == "active": write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) @@ -284,58 +244,116 @@ def write_end_of_file(file_location, OS, linklist): add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + main_title + "/#" + last_title.lower()) -################### actually parse the md file ################### - -# remove the old directories if needed -remove_directory_tree(root_dir_generic) -remove_directory_tree(root_dir_os_specific_linux) -remove_directory_tree(root_dir_os_specific_windows) -remove_directory_tree(root_dir_os_specific_macos) - -# create directories for the source markdown file -create_directory(root_dir_generic) -create_directory(".\\parsed_mds\\os_specific") -create_directory(root_dir_os_specific_linux) -create_directory(root_dir_os_specific_windows) -create_directory(root_dir_os_specific_macos) -create_directory(root_dir_generic + curr_dirs[0]) -create_directory(root_dir_os_specific_linux + curr_dirs[0]) -create_directory(root_dir_os_specific_windows + curr_dirs[0]) -create_directory(root_dir_os_specific_macos + curr_dirs[0]) - -# process the jinja macros -jinja_parser(filename) - -# open the file and store line by line in the right file -with open(".\\copies\\" + filename, 'r') as readfile: - - for line in readfile: - title_level, title, directory = check_for_title(line) - - # line is a title with a maximum depth of 3 - if title_level > 0: - last_title_level = title_level - last_title = title - last_directory = directory - after_first_title = True - - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2]) - -# write end of file for the last file -write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) -write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) -write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) -write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) - +# function that makes sure all titles can be used as valid filenames +def make_valid_title(s): + # Define a regex pattern for invalid characters on both Windows and Linux + invalid_chars = r'[<>:"/\\|?*\0()]' + + # Remove invalid characters + valid_filename = re.sub(invalid_chars, '', s) + + # Strip leading/trailing whitespace + valid_filename = valid_filename.strip() + + return valid_filename + + +for filename in filenames.keys(): + try: + # make a copy of one of the md files to test some things + shutil.copyfile(filenames[filename], + ".\\copies\\" + filename) + + ################### define/reset loop specific variables ################### + + # variable for the main title (needed for reference links) + main_title = filename[:-3] + + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for i in range(4)] + + # variable to keep track whether we're dealing with OS-specific info or not + OS_specific = False + + # variable that keeps track of the latest non-zero level title and corresponding directory + last_title_level = 1 + last_title = None + last_directory = None + last_was_title = False + + # list to keep track of links in the text + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + # dictionaries to keep track of current OS + active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + + # variable that shows whether the first title has been reached yet + after_first_title = False + + ################### actually parse the md file ################### + + # remove the old directories if needed + remove_directory_tree(root_dir_generic) + remove_directory_tree(root_dir_os_specific_linux) + remove_directory_tree(root_dir_os_specific_windows) + remove_directory_tree(root_dir_os_specific_macos) + + # create directories for the source markdown file + create_directory(root_dir_generic) + create_directory(".\\parsed_mds\\os_specific") + create_directory(root_dir_os_specific_linux) + create_directory(root_dir_os_specific_windows) + create_directory(root_dir_os_specific_macos) + create_directory(root_dir_generic + curr_dirs[0]) + create_directory(root_dir_os_specific_linux + curr_dirs[0]) + create_directory(root_dir_os_specific_windows + curr_dirs[0]) + create_directory(root_dir_os_specific_macos + curr_dirs[0]) + + # process the jinja macros + jinja_parser(filename) + + # open the file and store line by line in the right file + with open(".\\copies\\" + filename, 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + # line is a title with a maximum depth of 3 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": + choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) + + # write end of file for the last file + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos) + print("Parsing succeeded for file: " + filename) + succeeded += 1 + except: + print("Parsing failed for file: " + filename) + failed += 1 + +print("Success ratio: " + str(succeeded/(succeeded + failed) * 100) + "%") # TODO: directory cleanup From 2116d6e2412e56c48af0e2f032535f687836859c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 16:42:44 +0200 Subject: [PATCH 006/152] adapt the parser to take all files as input, not all files get parsed successfully yet --- scripts/HPC chatbot preprocessor/main.py | 38 +++++++++++++++--------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 86bd2ed9c3f..b2900334ef8 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -17,10 +17,6 @@ if not os.path.exists(".\\if_mangled_files"): os.mkdir(".\\if_mangled_files") -# copy the examples to the right location wrt the script in order to allow jinja to work -if not os.path.exists(".\\examples"): - shutil.copytree("..\\..\\mkdocs\\docs\\HPC\\examples", ".\\examples") - ################### define global variables ################### # variable that keeps track of the source directories @@ -40,8 +36,10 @@ for file in files: filenames[file] = os.path.join(source_directory, file) +# TODO: find solution for duplicate filenames between linux tutorial and normal files -# filenames = {'account.md': '..\\..\\mkdocs\\docs\\HPC\\account.md'} +# TODO: problem-files (other layout than normal markdown-files) +problem_files = ["linux_tutorial\\getting_started.md", "linux_tutorial\\navigating.md"] ################### define functions ################### @@ -72,9 +70,9 @@ def reset_link_lists(): # function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables def check_for_title(curr_line): - global curr_dirs, last_title + global curr_dirs, last_title, in_code_block logic_output = check_for_title_logic(curr_line) - if logic_output == 0: + if logic_output == 0 or in_code_block: return 0, None, None else: if last_title is not None: @@ -99,6 +97,13 @@ def check_for_title(curr_line): return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] +# function used to detect codeblocks and make sure the comments don't get detected as titles +def detect_in_code_block(curr_line): + global in_code_block + if '```' in curr_line or (('
' in curr_line) ^ ('
' in curr_line)): + in_code_block = not in_code_block + + # function that creates directories if needed def create_directory(new_directory): if not os.path.exists(new_directory): @@ -227,6 +232,7 @@ def choose_and_write_to_file(curr_line): def add_reference_link(file_location, reference_link): with open(file_location, 'a') as write_file: write_file.write("\nreference: " + reference_link + "\n") + # TODO: fix trailing spaces in filename # function that adds the links that should be at the end of a file @@ -260,6 +266,7 @@ def make_valid_title(s): for filename in filenames.keys(): try: + # if True: # make a copy of one of the md files to test some things shutil.copyfile(filenames[filename], ".\\copies\\" + filename) @@ -293,13 +300,10 @@ def make_valid_title(s): # variable that shows whether the first title has been reached yet after_first_title = False - ################### actually parse the md file ################### + # variable that is used to be sure that we are detecting titles and not comments from codeblocks + in_code_block = False - # remove the old directories if needed - remove_directory_tree(root_dir_generic) - remove_directory_tree(root_dir_os_specific_linux) - remove_directory_tree(root_dir_os_specific_windows) - remove_directory_tree(root_dir_os_specific_macos) + ################### actually parse the md file ################### # create directories for the source markdown file create_directory(root_dir_generic) @@ -321,6 +325,8 @@ def make_valid_title(s): for line in readfile: title_level, title, directory = check_for_title(line) + detect_in_code_block(line) + # line is a title with a maximum depth of 3 if title_level > 0: last_title_level = title_level @@ -341,6 +347,9 @@ def make_valid_title(s): choose_and_write_to_file(next_action[2]) # write end of file for the last file + # print(root_dir_generic) + # print(last_directory) + # print(filename) write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) @@ -348,12 +357,13 @@ def make_valid_title(s): links_windows) write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) - print("Parsing succeeded for file: " + filename) succeeded += 1 except: print("Parsing failed for file: " + filename) failed += 1 print("Success ratio: " + str(succeeded/(succeeded + failed) * 100) + "%") +print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") # TODO: directory cleanup +# TODO: reconsider maximum depth to be detected as title From 159aa62af18dd76b5567c00a98a08c16081d9773 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 16:43:09 +0200 Subject: [PATCH 007/152] small update, not important --- .../HPC chatbot preprocessor/start_checker.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py index 5661c79ddc9..b328e7ab80c 100644 --- a/scripts/HPC chatbot preprocessor/start_checker.py +++ b/scripts/HPC chatbot preprocessor/start_checker.py @@ -17,3 +17,19 @@ lines_until_title += 1 print(filename + " : " + str(lines_until_title)) break + +directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\linux-tutorial" + +for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + # if filename.endswith("xdmod.md"): + # break + if filename.endswith(".md"): + lines_until_title = 0 + with open(directory + "\\" + filename, "r") as file: + for line in file: + if line[0] == "#": + break + lines_until_title += 1 + print(filename + " : " + str(lines_until_title)) + break From 75765e555edb9bc67ebcaf0136ac5efc8d0461ad Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 16:43:47 +0200 Subject: [PATCH 008/152] change to the templates --- scripts/HPC chatbot preprocessor/jinja_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py index 1b033bfdc6c..612c20dd06b 100644 --- a/scripts/HPC chatbot preprocessor/jinja_parser.py +++ b/scripts/HPC chatbot preprocessor/jinja_parser.py @@ -1,5 +1,5 @@ import yaml -from jinja2 import Template +from jinja2 import Template, FileSystemLoader, Environment, ChoiceLoader from if_mangler import mangle_ifs @@ -17,7 +17,9 @@ def jinja_parser(filename): md_content = md_file.read() # Use Jinja2 to replace the macros - template = Template(md_content) + templateloader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) + templateEnv = Environment(loader=templateloader) + template = templateEnv.get_template(filename) rendered_content = template.render(words_dict) # Save the rendered content to a new file From 57d9cfe5f25c66f8a6c2721fca9eaac1e6eea25d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 16:44:18 +0200 Subject: [PATCH 009/152] change to accommodate for more nested if-clauses --- .../HPC chatbot preprocessor/if_mangler.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py index 9980a2e83e8..8dd0d099452 100644 --- a/scripts/HPC chatbot preprocessor/if_mangler.py +++ b/scripts/HPC chatbot preprocessor/if_mangler.py @@ -1,7 +1,7 @@ import re # global variable to keep track of latest if-statement scope -is_os = 0 # Can be 0, 1 or 2 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if} +is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} def mangle_os_ifs(line): @@ -26,22 +26,35 @@ def mangle_os_ifs(line): constr_match.end() + start_index + added_length - 1:] added_length += 8 is_os = 0 - elif is_os == 1: - is_os = 2 - elif if_match: - if if_os_match: + if is_os == 3: line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ constr_match.end() + start_index + added_length - 1:] added_length += 8 is_os = 2 + elif is_os == 1: + is_os = 2 + elif if_match: + if if_os_match: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 3 + else: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 2 else: if is_os == 2: is_os = 1 else: is_os = 0 else: - if is_os == 2: + if is_os == 2 or is_os == 3: line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ constr_match.end() + start_index + added_length - 1:] From 75d345b1bd41325c5a4242251c093c6b396d3e21 Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Fri, 9 Aug 2024 16:45:53 +0200 Subject: [PATCH 010/152] Delete scripts/HPC chatbot preprocessor/start_checker.py This file is just used to test some things locally and not part of the parser --- .../HPC chatbot preprocessor/start_checker.py | 35 ------------------- 1 file changed, 35 deletions(-) delete mode 100644 scripts/HPC chatbot preprocessor/start_checker.py diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py deleted file mode 100644 index b328e7ab80c..00000000000 --- a/scripts/HPC chatbot preprocessor/start_checker.py +++ /dev/null @@ -1,35 +0,0 @@ -# THIS IS NOT AN IMPORTANT FILE, DON'T WORRY ABOUT IT, I JUST USED IT TO TEST SOME THINGS - -import os - -directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC" - -for dirpath, dirnames, filenames in os.walk(directory): - for filename in filenames: - # if filename.endswith("xdmod.md"): - # break - if filename.endswith(".md"): - lines_until_title = 0 - with open(directory + "\\" + filename, "r") as file: - for line in file: - if line[0] == "#": - break - lines_until_title += 1 - print(filename + " : " + str(lines_until_title)) - break - -directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\linux-tutorial" - -for dirpath, dirnames, filenames in os.walk(directory): - for filename in filenames: - # if filename.endswith("xdmod.md"): - # break - if filename.endswith(".md"): - lines_until_title = 0 - with open(directory + "\\" + filename, "r") as file: - for line in file: - if line[0] == "#": - break - lines_until_title += 1 - print(filename + " : " + str(lines_until_title)) - break From ff7a9fc381399402c36670ef6ddb5bfb245b1dd4 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 12 Aug 2024 11:24:46 +0200 Subject: [PATCH 011/152] make sure files with duplicate names between normal files and linux-tutorial are both read and saved properly --- .../HPC chatbot preprocessor/if_mangler.py | 2 +- .../HPC chatbot preprocessor/jinja_parser.py | 24 +- scripts/HPC chatbot preprocessor/main.py | 246 ++++++++++-------- 3 files changed, 149 insertions(+), 123 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py index 8dd0d099452..46b121610c9 100644 --- a/scripts/HPC chatbot preprocessor/if_mangler.py +++ b/scripts/HPC chatbot preprocessor/if_mangler.py @@ -66,7 +66,7 @@ def mangle_os_ifs(line): def mangle_ifs(directory, file): with open(".\\if_mangled_files\\" + file, 'w') as write_file: - with open(directory + "\\" + file, 'r') as read_file: + with open(directory, 'r') as read_file: for line in read_file: new_line = mangle_os_ifs(line) write_file.write(new_line) diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py index 612c20dd06b..603a453ecf7 100644 --- a/scripts/HPC chatbot preprocessor/jinja_parser.py +++ b/scripts/HPC chatbot preprocessor/jinja_parser.py @@ -4,24 +4,28 @@ # function that let's jinja do its thing to format the files expect for the os-related if-statements -def jinja_parser(filename): +def jinja_parser(filename, copy_location): # Read the YAML file with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: words_dict = yaml.safe_load(yml_file) - # Mangle the OS-related if-statements - mangle_ifs('.\\copies', filename) + # ugly fix for index.md error + additional_context = { + 'config': { + 'repo_url': 'https://github.com/hpcugent/vsc_user_docs' + } + } + combined_context = {**words_dict, **additional_context} - # Read the if-mangled Markdown file - with open('.\\if_mangled_files\\' + filename, 'r') as md_file: - md_content = md_file.read() + # Mangle the OS-related if-statements + mangle_ifs(copy_location, filename) # Use Jinja2 to replace the macros - templateloader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) - templateEnv = Environment(loader=templateloader) + template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) + templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) - rendered_content = template.render(words_dict) + rendered_content = template.render(combined_context) # Save the rendered content to a new file - with open('.\\copies\\' + filename, 'w') as output_file: + with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: output_file.write(rendered_content) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index b2900334ef8..8351979a865 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -7,34 +7,22 @@ succeeded = 0 failed = 0 -# make the necessary directories -if not os.path.exists(".\\copies"): - os.mkdir(".\\copies") - -if not os.path.exists(".\\parsed_mds"): - os.mkdir(".\\parsed_mds") - -if not os.path.exists(".\\if_mangled_files"): - os.mkdir(".\\if_mangled_files") - ################### define global variables ################### # variable that keeps track of the source directories source_directories = ["..\\..\\mkdocs\\docs\\HPC\\", "..\\..\\mkdocs\\docs\\HPC\\linux-tutorial"] -# variable that keeps track of the directories that are used to write in at different levels -root_dir_generic = ".\\parsed_mds\\generic\\" -root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" -root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" -root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" - # list of all the filenames -filenames = {} +filenames_generic = {} +filenames_linux = {} for source_directory in source_directories: all_items = os.listdir(source_directory) files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] for file in files: - filenames[file] = os.path.join(source_directory, file) + if "linux-tutorial" in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) # TODO: find solution for duplicate filenames between linux tutorial and normal files @@ -147,7 +135,7 @@ def check_if_statements(curr_line): content = match.group(1) # new if-statement wrt OS - if re.match(r'if OS == ', content): + if re.search(r'if OS == ', content): OS = content[9:-1] # set new active OS @@ -159,7 +147,7 @@ def check_if_statements(curr_line): active_OS_if_states[other_OS] = "inactive" # endif statement wrt OS - elif re.match(r'endif ', content): + elif re.search(r'endif ', content): if str(1) in active_OS_if_states.values(): active_OS_if_states[ list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" @@ -168,7 +156,7 @@ def check_if_statements(curr_line): active_OS_if_states[key] = "inactive" # else statement wrt OS - elif re.match(r'else ', content): + elif re.search(r'else ', content): i = 0 for i in range(3): @@ -264,106 +252,140 @@ def make_valid_title(s): return valid_filename -for filename in filenames.keys(): - try: - # if True: - # make a copy of one of the md files to test some things - shutil.copyfile(filenames[filename], - ".\\copies\\" + filename) - - ################### define/reset loop specific variables ################### - - # variable for the main title (needed for reference links) - main_title = filename[:-3] - - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(4)] - - # variable to keep track whether we're dealing with OS-specific info or not - OS_specific = False - - # variable that keeps track of the latest non-zero level title and corresponding directory - last_title_level = 1 - last_title = None - last_directory = None - last_was_title = False - - # list to keep track of links in the text - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - - # dictionaries to keep track of current OS - active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} +# remove the directories from a previous run of the parser +remove_directory_tree(".\\parsed_mds") +remove_directory_tree(".\\copies") +remove_directory_tree(".\\if_mangled_files") - # variable that shows whether the first title has been reached yet - after_first_title = False - - # variable that is used to be sure that we are detecting titles and not comments from codeblocks - in_code_block = False +# make the necessary directories +if not os.path.exists(".\\copies"): + os.mkdir(".\\copies") - ################### actually parse the md file ################### +if not os.path.exists(".\\copies\\linux"): + os.mkdir(".\\copies\\linux") - # create directories for the source markdown file - create_directory(root_dir_generic) - create_directory(".\\parsed_mds\\os_specific") - create_directory(root_dir_os_specific_linux) - create_directory(root_dir_os_specific_windows) - create_directory(root_dir_os_specific_macos) - create_directory(root_dir_generic + curr_dirs[0]) - create_directory(root_dir_os_specific_linux + curr_dirs[0]) - create_directory(root_dir_os_specific_windows + curr_dirs[0]) - create_directory(root_dir_os_specific_macos + curr_dirs[0]) +if not os.path.exists(".\\parsed_mds"): + os.mkdir(".\\parsed_mds") - # process the jinja macros - jinja_parser(filename) +if not os.path.exists(".\\if_mangled_files"): + os.mkdir(".\\if_mangled_files") - # open the file and store line by line in the right file - with open(".\\copies\\" + filename, 'r') as readfile: +for filenames in [filenames_generic, filenames_linux]: + for filename in filenames.keys(): + try: + # if True: + # make a copy of one of the md files to test some things + if "linux-tutorial" in filenames[filename]: + copy_file = ".\\copies\\linux\\" + filename + else: + copy_file = ".\\copies\\" + filename + shutil.copyfile(filenames[filename], copy_file) - for line in readfile: - title_level, title, directory = check_for_title(line) + ################### define/reset loop specific variables ################### - detect_in_code_block(line) + # variable that keeps track of the directories that are used to write in at different levels + if "linux-tutorial" in filenames[filename]: + root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" + else: + root_dir_generic = ".\\parsed_mds\\generic\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" + + # variable for the main title (needed for reference links) + main_title = filename[:-3] + + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for i in range(4)] + + # variable to keep track whether we're dealing with OS-specific info or not + OS_specific = False + + # variable that keeps track of the latest non-zero level title and corresponding directory + last_title_level = 1 + last_title = None + last_directory = None + last_was_title = False + + # list to keep track of links in the text + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + # dictionaries to keep track of current OS + active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + + # variable that shows whether the first title has been reached yet + after_first_title = False + + # variable that is used to be sure that we are detecting titles and not comments from codeblocks + in_code_block = False + + ################### actually parse the md file ################### + + # create directories for the source markdown file + create_directory(root_dir_generic) + create_directory(".\\parsed_mds\\os_specific") + create_directory(root_dir_os_specific_linux) + create_directory(root_dir_os_specific_windows) + create_directory(root_dir_os_specific_macos) + create_directory(root_dir_generic + curr_dirs[0]) + create_directory(root_dir_os_specific_linux + curr_dirs[0]) + create_directory(root_dir_os_specific_windows + curr_dirs[0]) + create_directory(root_dir_os_specific_macos + curr_dirs[0]) + + # process the jinja macros + jinja_parser(filename, copy_file) + + # open the file and store line by line in the right file + with open(copy_file, 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + detect_in_code_block(line) + + # line is a title with a maximum depth of 3 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[ + 0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": + choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) - # line is a title with a maximum depth of 3 - if title_level > 0: - last_title_level = title_level - last_title = title - last_directory = directory - after_first_title = True + # write end of file for the last file + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos) + succeeded += 1 + except: + print("Parsing failed for file: " + filename) + failed += 1 - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2]) - - # write end of file for the last file - # print(root_dir_generic) - # print(last_directory) - # print(filename) - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos) - succeeded += 1 - except: - print("Parsing failed for file: " + filename) - failed += 1 - -print("Success ratio: " + str(succeeded/(succeeded + failed) * 100) + "%") -print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") +print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%") +print( + "Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") # TODO: directory cleanup # TODO: reconsider maximum depth to be detected as title +# TODO: adapt script to be used from command line From 7d279d6a7f1992275eae487c0893befc4a48d6f9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 12 Aug 2024 11:54:48 +0200 Subject: [PATCH 012/152] fixed the problem of some files being written in reST instead of markdown --- scripts/HPC chatbot preprocessor/main.py | 25 +++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 8351979a865..12f222b82f0 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -1,6 +1,8 @@ import os import re import shutil +import pypandoc + from jinja_parser import jinja_parser # variables for analytics @@ -24,10 +26,8 @@ else: filenames_generic[file] = os.path.join(source_directory, file) -# TODO: find solution for duplicate filenames between linux tutorial and normal files - -# TODO: problem-files (other layout than normal markdown-files) -problem_files = ["linux_tutorial\\getting_started.md", "linux_tutorial\\navigating.md"] +# some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc +problem_files = ["getting_started.md", "navigating.md"] ################### define functions ################### @@ -272,8 +272,8 @@ def make_valid_title(s): for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): - try: - # if True: + # try: + if True: # make a copy of one of the md files to test some things if "linux-tutorial" in filenames[filename]: copy_file = ".\\copies\\linux\\" + filename @@ -341,6 +341,10 @@ def make_valid_title(s): # process the jinja macros jinja_parser(filename, copy_file) + # convert the files without proper markdown layout into markdown using pandoc + if "linux-tutorial" in filenames[filename] and filename in problem_files: + pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) + # open the file and store line by line in the right file with open(copy_file, 'r') as readfile: @@ -378,13 +382,12 @@ def make_valid_title(s): write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) succeeded += 1 - except: - print("Parsing failed for file: " + filename) - failed += 1 + # except: + # print("Parsing failed for file: " + filename) + # failed += 1 print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%") -print( - "Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") +print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") # TODO: directory cleanup # TODO: reconsider maximum depth to be detected as title From 8047572387eb08e278bde89f9d688b74c817b7d0 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 12 Aug 2024 13:33:14 +0200 Subject: [PATCH 013/152] some small fixes --- scripts/HPC chatbot preprocessor/main.py | 78 +++++++++++++----------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 12f222b82f0..bf16c95ea35 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -37,11 +37,11 @@ def remove_directory_tree(old_directory): shutil.rmtree(old_directory) -# function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title) +# function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title) def check_for_title_logic(curr_line): global curr_dirs match = re.match(r'^#+ ', curr_line) - if match and len(match.group(0)) <= 4: + if match and len(match.group(0)) <= 5: return len(match.group(0)) - 1 else: return 0 @@ -64,13 +64,13 @@ def check_for_title(curr_line): return 0, None, None else: if last_title is not None: - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux) + links_linux, is_linux_tutorial) write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows) + links_windows, is_linux_tutorial) write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos) + links_macos, is_linux_tutorial) reset_link_lists() curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + make_valid_title( @@ -124,12 +124,11 @@ def replace_markdown_markers(curr_line, linklist): # function that checks for if-statements def check_if_statements(curr_line): - # TODO: adapt regex for annoying inconsistencies # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{-if-%-\s([^%]*)%-if-}(.*)', curr_line) + match = re.search(r'^\{-if-%([^%]*)%-if-}(.*)', curr_line) # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{-if-%-\s[^%]*%-if-})(.*)', curr_line) + match_large = re.search(r'^(.*)(\{-if-%[^%]*%-if-})(.*)', curr_line) if match: content = match.group(1) @@ -147,7 +146,7 @@ def check_if_statements(curr_line): active_OS_if_states[other_OS] = "inactive" # endif statement wrt OS - elif re.search(r'endif ', content): + elif re.search(r'endif', content): if str(1) in active_OS_if_states.values(): active_OS_if_states[ list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" @@ -156,7 +155,7 @@ def check_if_statements(curr_line): active_OS_if_states[key] = "inactive" # else statement wrt OS - elif re.search(r'else ', content): + elif re.search(r'else', content): i = 0 for i in range(3): @@ -220,11 +219,10 @@ def choose_and_write_to_file(curr_line): def add_reference_link(file_location, reference_link): with open(file_location, 'a') as write_file: write_file.write("\nreference: " + reference_link + "\n") - # TODO: fix trailing spaces in filename # function that adds the links that should be at the end of a file -def write_end_of_file(file_location, OS, linklist): +def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): if len(OS) > 0: OS = OS + "/" @@ -234,8 +232,13 @@ def write_end_of_file(file_location, OS, linklist): for i, link in enumerate(linklist): write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n") + if is_linux_tutorial_: + linux_part = "linux-tutorial/" + else: + linux_part = "" + # finally add the reference link - add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + main_title + "/#" + last_title.lower()) + add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) # function that makes sure all titles can be used as valid filenames @@ -243,11 +246,14 @@ def make_valid_title(s): # Define a regex pattern for invalid characters on both Windows and Linux invalid_chars = r'[<>:"/\\|?*\0()]' + # get rid of extra information between {} brackets + s = re.sub(r'\{.*?}', '', s) + # Remove invalid characters valid_filename = re.sub(invalid_chars, '', s) # Strip leading/trailing whitespace - valid_filename = valid_filename.strip() + valid_filename = valid_filename.strip().strip('-') return valid_filename @@ -272,19 +278,21 @@ def make_valid_title(s): for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): - # try: - if True: - # make a copy of one of the md files to test some things - if "linux-tutorial" in filenames[filename]: + try: + ################### define/reset loop specific variables ################### + + # variable that keeps track of whether file is part of the linux tutorial + is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) + + # make a copy of the original file in order to make sure the original does not get altered + if is_linux_tutorial: copy_file = ".\\copies\\linux\\" + filename else: copy_file = ".\\copies\\" + filename shutil.copyfile(filenames[filename], copy_file) - ################### define/reset loop specific variables ################### - # variable that keeps track of the directories that are used to write in at different levels - if "linux-tutorial" in filenames[filename]: + if is_linux_tutorial: root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" @@ -299,7 +307,7 @@ def make_valid_title(s): main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(4)] + curr_dirs = [filename[:-3] for i in range(5)] # variable to keep track whether we're dealing with OS-specific info or not OS_specific = False @@ -353,7 +361,7 @@ def make_valid_title(s): detect_in_code_block(line) - # line is a title with a maximum depth of 3 + # line is a title with a maximum depth of 4 if title_level > 0: last_title_level = title_level last_title = title @@ -364,8 +372,7 @@ def make_valid_title(s): elif after_first_title: # check for if-statements and write the appropriate lines in the right files next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[ - 0] == "check_extra_message": + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": if next_action[0] == "write_text_and_check_extra_message": choose_and_write_to_file(next_action[2]) next_action = check_if_statements(next_action[1]) @@ -374,21 +381,22 @@ def make_valid_title(s): choose_and_write_to_file(next_action[2]) # write end of file for the last file - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux) + links_linux, is_linux_tutorial) write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows) + links_windows, is_linux_tutorial) write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos) + links_macos, is_linux_tutorial) succeeded += 1 - # except: - # print("Parsing failed for file: " + filename) - # failed += 1 + except: + print("Parsing failed for file: " + filename) + failed += 1 print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%") print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") -# TODO: directory cleanup -# TODO: reconsider maximum depth to be detected as title +remove_directory_tree(".\\copies") +remove_directory_tree(".\\if_mangled_files") +# TODO: reconsider maximum depth to be detected as title (now at four) # TODO: adapt script to be used from command line From 7d1c5ed2cfca12d5eb4ecaffa3178e821c63f210 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 10:35:06 +0200 Subject: [PATCH 014/152] remove try-except-structure --- scripts/HPC chatbot preprocessor/main.py | 216 +++++++++++------------ 1 file changed, 105 insertions(+), 111 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index bf16c95ea35..2ed91022b7b 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -278,122 +278,116 @@ def make_valid_title(s): for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): - try: - ################### define/reset loop specific variables ################### + ################### define/reset loop specific variables ################### - # variable that keeps track of whether file is part of the linux tutorial - is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) + # variable that keeps track of whether file is part of the linux tutorial + is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) - # make a copy of the original file in order to make sure the original does not get altered - if is_linux_tutorial: - copy_file = ".\\copies\\linux\\" + filename - else: - copy_file = ".\\copies\\" + filename - shutil.copyfile(filenames[filename], copy_file) - - # variable that keeps track of the directories that are used to write in at different levels - if is_linux_tutorial: - root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" - else: - root_dir_generic = ".\\parsed_mds\\generic\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" - - # variable for the main title (needed for reference links) - main_title = filename[:-3] - - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(5)] - - # variable to keep track whether we're dealing with OS-specific info or not - OS_specific = False - - # variable that keeps track of the latest non-zero level title and corresponding directory - last_title_level = 1 - last_title = None - last_directory = None - last_was_title = False - - # list to keep track of links in the text - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - - # dictionaries to keep track of current OS - active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} - - # variable that shows whether the first title has been reached yet - after_first_title = False - - # variable that is used to be sure that we are detecting titles and not comments from codeblocks - in_code_block = False - - ################### actually parse the md file ################### - - # create directories for the source markdown file - create_directory(root_dir_generic) - create_directory(".\\parsed_mds\\os_specific") - create_directory(root_dir_os_specific_linux) - create_directory(root_dir_os_specific_windows) - create_directory(root_dir_os_specific_macos) - create_directory(root_dir_generic + curr_dirs[0]) - create_directory(root_dir_os_specific_linux + curr_dirs[0]) - create_directory(root_dir_os_specific_windows + curr_dirs[0]) - create_directory(root_dir_os_specific_macos + curr_dirs[0]) - - # process the jinja macros - jinja_parser(filename, copy_file) - - # convert the files without proper markdown layout into markdown using pandoc - if "linux-tutorial" in filenames[filename] and filename in problem_files: - pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) - - # open the file and store line by line in the right file - with open(copy_file, 'r') as readfile: - - for line in readfile: - title_level, title, directory = check_for_title(line) - - detect_in_code_block(line) - - # line is a title with a maximum depth of 4 - if title_level > 0: - last_title_level = title_level - last_title = title - last_directory = directory - after_first_title = True - - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": + # make a copy of the original file in order to make sure the original does not get altered + if is_linux_tutorial: + copy_file = ".\\copies\\linux\\" + filename + else: + copy_file = ".\\copies\\" + filename + shutil.copyfile(filenames[filename], copy_file) + + # variable that keeps track of the directories that are used to write in at different levels + if is_linux_tutorial: + root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" + else: + root_dir_generic = ".\\parsed_mds\\generic\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" + + # variable for the main title (needed for reference links) + main_title = filename[:-3] + + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for i in range(5)] + + # variable to keep track whether we're dealing with OS-specific info or not + OS_specific = False + + # variable that keeps track of the latest non-zero level title and corresponding directory + last_title_level = 1 + last_title = None + last_directory = None + last_was_title = False + + # list to keep track of links in the text + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + # dictionaries to keep track of current OS + active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + + # variable that shows whether the first title has been reached yet + after_first_title = False + + # variable that is used to be sure that we are detecting titles and not comments from codeblocks + in_code_block = False + + ################### actually parse the md file ################### + + # create directories for the source markdown file + create_directory(root_dir_generic) + create_directory(".\\parsed_mds\\os_specific") + create_directory(root_dir_os_specific_linux) + create_directory(root_dir_os_specific_windows) + create_directory(root_dir_os_specific_macos) + create_directory(root_dir_generic + curr_dirs[0]) + create_directory(root_dir_os_specific_linux + curr_dirs[0]) + create_directory(root_dir_os_specific_windows + curr_dirs[0]) + create_directory(root_dir_os_specific_macos + curr_dirs[0]) + + # process the jinja macros + jinja_parser(filename, copy_file) + + # convert the files without proper markdown layout into markdown using pandoc + if "linux-tutorial" in filenames[filename] and filename in problem_files: + pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) + + # open the file and store line by line in the right file + with open(copy_file, 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + detect_in_code_block(line) + + # line is a title with a maximum depth of 4 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) - # write end of file for the last file - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos, is_linux_tutorial) - succeeded += 1 - except: - print("Parsing failed for file: " + filename) - failed += 1 + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) + + # write end of file for the last file + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos, is_linux_tutorial) -print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%") print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") remove_directory_tree(".\\copies") From 984b0cd3868b38c59e72c56fd75f04c6e4918b18 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 12:23:04 +0200 Subject: [PATCH 015/152] collapse all code into one file --- scripts/HPC chatbot preprocessor/main.py | 105 ++++++++++++++++++++++- 1 file changed, 101 insertions(+), 4 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 2ed91022b7b..b6e42e99ea0 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -2,8 +2,8 @@ import re import shutil import pypandoc - -from jinja_parser import jinja_parser +import yaml +from jinja2 import FileSystemLoader, Environment, ChoiceLoader # variables for analytics succeeded = 0 @@ -29,6 +29,9 @@ # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc problem_files = ["getting_started.md", "navigating.md"] +# global variable to keep track of latest if-statement scope +is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} + ################### define functions ################### # function that removes the previous file structure before starting the process of making a new one @@ -122,6 +125,102 @@ def replace_markdown_markers(curr_line, linklist): return curr_line, linklist +# function that let's jinja do its thing to format the files expect for the os-related if-statements +def jinja_parser(filename, copy_location): + # Read the YAML file + with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: + words_dict = yaml.safe_load(yml_file) + + # ugly fix for index.md error + additional_context = { + 'config': { + 'repo_url': 'https://github.com/hpcugent/vsc_user_docs' + } + } + combined_context = {**words_dict, **additional_context} + + # Mangle the OS-related if-statements + mangle_ifs(copy_location, filename) + + # Use Jinja2 to replace the macros + template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) + templateEnv = Environment(loader=template_loader) + template = templateEnv.get_template(filename) + rendered_content = template.render(combined_context) + + # Save the rendered content to a new file + with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: + output_file.write(rendered_content) + + +def mangle_os_ifs(line): + global is_os + + match = re.search(r'\{%(.*?)%}(.*)', line) + + start_index = 0 + added_length = 0 + + while match: + + constr_match = re.search(r'\{%.*?%}', match.string) + if_match = re.search(r'if ', match.group(1)) + if_os_match = re.search(r'if OS == ', match.group(1)) + endif_match = re.search(r'endif', match.group(1)) + + if endif_match: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 0 + if is_os == 3: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 2 + elif is_os == 1: + is_os = 2 + elif if_match: + if if_os_match: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 3 + else: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 2 + else: + if is_os == 2: + is_os = 1 + else: + is_os = 0 + else: + if is_os == 2 or is_os == 3: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + start_index += constr_match.end() + match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) + return line + + +def mangle_ifs(directory, file): + with open(".\\if_mangled_files\\" + file, 'w') as write_file: + with open(directory, 'r') as read_file: + for line in read_file: + new_line = mangle_os_ifs(line) + write_file.write(new_line) + + # function that checks for if-statements def check_if_statements(curr_line): # check whether the first part of the line contains information wrt if-statements @@ -388,8 +487,6 @@ def make_valid_title(s): write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos, is_linux_tutorial) -print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") - remove_directory_tree(".\\copies") remove_directory_tree(".\\if_mangled_files") # TODO: reconsider maximum depth to be detected as title (now at four) From 8f5eeaa5454860326bf3a02d15a63c5622ab7aee Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 12:26:02 +0200 Subject: [PATCH 016/152] Rename file --- scripts/HPC chatbot preprocessor/{main.py => chatbot_parser.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/HPC chatbot preprocessor/{main.py => chatbot_parser.py} (100%) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/chatbot_parser.py similarity index 100% rename from scripts/HPC chatbot preprocessor/main.py rename to scripts/HPC chatbot preprocessor/chatbot_parser.py From 2b97b7a31d9ba151f1747a152736dac4906af466 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 12:30:36 +0200 Subject: [PATCH 017/152] cleanup repository --- .../HPC chatbot preprocessor/.idea/.gitignore | 8 - .../.idea/HPC chatbot preprocessor.iml | 10 - .../inspectionProfiles/Project_Default.xml | 25 -- .../inspectionProfiles/profiles_settings.xml | 6 - .../HPC chatbot preprocessor/.idea/misc.xml | 7 - .../.idea/modules.xml | 8 - .../HPC chatbot preprocessor/.idea/vcs.xml | 6 - .../copies/getting_started_copy.md | 268 ------------------ .../Getting-Access/Getting-Access.txt | 25 -- .../Getting-Connected/Getting-Connected.txt | 19 -- .../Getting-Started/Getting-Started.txt | 11 - .../Inspect-your-results.txt | 56 ---- .../Getting-Started/Next-steps/Next-steps.txt | 15 - .../Submitting-a-job/Submitting-a-job.txt | 60 ---- .../Transfer-your-files.txt | 21 -- .../Wait-for-job-to-be-executed.txt | 26 -- .../Getting-Access/Getting-Access.txt | 2 - .../Getting-Connected/Getting-Connected.txt | 18 -- .../Getting-Started/Getting-Started.txt | 2 - .../Inspect-your-results.txt | 2 - .../Getting-Started/Next-steps/Next-steps.txt | 2 - .../Submitting-a-job/Submitting-a-job.txt | 2 - .../Transfer-your-files.txt | 21 -- .../Wait-for-job-to-be-executed.txt | 2 - .../Getting-Access/Getting-Access.txt | 2 - .../Getting-Connected/Getting-Connected.txt | 13 - .../Getting-Started/Getting-Started.txt | 2 - .../Inspect-your-results.txt | 2 - .../Getting-Started/Next-steps/Next-steps.txt | 2 - .../Submitting-a-job/Submitting-a-job.txt | 2 - .../Transfer-your-files.txt | 21 -- .../Wait-for-job-to-be-executed.txt | 2 - .../Getting-Access/Getting-Access.txt | 2 - .../Getting-Connected/Getting-Connected.txt | 13 - .../Getting-Started/Getting-Started.txt | 2 - .../Inspect-your-results.txt | 2 - .../Getting-Started/Next-steps/Next-steps.txt | 2 - .../Submitting-a-job/Submitting-a-job.txt | 2 - .../Transfer-your-files.txt | 15 - .../Wait-for-job-to-be-executed.txt | 2 - .../HPC chatbot preprocessor/if_mangler.py | 72 ----- .../HPC chatbot preprocessor/jinja_parser.py | 31 -- 42 files changed, 811 deletions(-) delete mode 100644 scripts/HPC chatbot preprocessor/.idea/.gitignore delete mode 100644 scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/misc.xml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/modules.xml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/vcs.xml delete mode 100644 scripts/HPC chatbot preprocessor/copies/getting_started_copy.md delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt delete mode 100644 scripts/HPC chatbot preprocessor/if_mangler.py delete mode 100644 scripts/HPC chatbot preprocessor/jinja_parser.py diff --git a/scripts/HPC chatbot preprocessor/.idea/.gitignore b/scripts/HPC chatbot preprocessor/.idea/.gitignore deleted file mode 100644 index 13566b81b01..00000000000 --- a/scripts/HPC chatbot preprocessor/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml b/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml deleted file mode 100644 index 2c80e126949..00000000000 --- a/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index fc946d9cefc..00000000000 --- a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2da2d6..00000000000 --- a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/misc.xml b/scripts/HPC chatbot preprocessor/.idea/misc.xml deleted file mode 100644 index 54cda8fd6dd..00000000000 --- a/scripts/HPC chatbot preprocessor/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/modules.xml b/scripts/HPC chatbot preprocessor/.idea/modules.xml deleted file mode 100644 index 58e027d745f..00000000000 --- a/scripts/HPC chatbot preprocessor/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/vcs.xml b/scripts/HPC chatbot preprocessor/.idea/vcs.xml deleted file mode 100644 index b2bdec2d71b..00000000000 --- a/scripts/HPC chatbot preprocessor/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md b/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md deleted file mode 100644 index 8fe33ebc513..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md +++ /dev/null @@ -1,268 +0,0 @@ -{% set exampleloc="mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist" %} -# Getting Started - -Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the {{hpcinfra}} and submitting your very first job. We'll also walk you through the process step by step using a practical example. - -In addition to this chapter, you might find the [recording of the *Introduction to HPC-UGent* training session](https://www.ugent.be/hpc/en/training/introhpcugent-recording) to be a useful resource. - -Before proceeding, read [the introduction to HPC](introduction.md) to gain an understanding of the {{ hpcinfra }} and related terminology. - -### Getting Access - -To get access to the {{hpcinfra}}, visit [Getting an HPC Account](account.md). - -If you have not used Linux before, -{%- if site == 'Gent' %} -now would be a good time to follow our [Linux Tutorial](linux-tutorial/index.md). -{%- else %} -please learn some basics first before continuing. (see [Appendix C - Useful Linux Commands](useful_linux_commands.md)) -{%- endif %} - -#### A typical workflow looks like this: - -1. Connect to the login nodes -2. Transfer your files to the {{hpcinfra}} -3. Optional: compile your code and test it -4. Create a job script and submit your job -5. Wait for job to be executed -6. Study the results generated by your jobs, either on the cluster or - after downloading them locally. - -We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using [TensorFlow](https://www.tensorflow.org/); -see the [example scripts](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}}). - -### Getting Connected - -There are two options to connect - -- Using a terminal to connect via SSH (for power users) (see [First Time connection to the {{ hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure)) -- [Using the web portal](web_portal.md) - -Considering your operating system is **{{OS}}**, - -{%- if OS == linux %} -it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. - -Assuming you have already generated SSH keys in the previous step ([Getting Access](#getting-access)), and that they are in a default location, you should now be able to login by running the following command: - -
ssh {{userid}}@{{loginnode}}
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Tip - - You can also still use the web portal (see [shell access on web portal](web_portal.md#shell-access)) - -{%- else %} -{%- if OS == windows %} it is recommended to use the web portal. -{%- else %} it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. {%- endif %} - -The [web portal](web_portal.md) offers a convenient way to upload files and gain shell access to the {{hpcinfra}} from a standard web browser (no software installation or configuration required). - -See [shell access](web_portal.md#shell-access) when using the web portal, or -[connection to the {{hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure) when using a terminal. - -Make sure you can get to a shell access to the {{hpcinfra}} before proceeding with the next steps. - -{%- endif %} - -!!! Info - - When having problems see the [connection issues section on the troubleshooting page](troubleshooting.md#sec:connecting-issues). - - -### Transfer your files - -Now that you can login, it is time to transfer files from your local computer to your **home directory** on the {{hpcinfra}}. - -Download [tensorflow_mnist.py](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py) -and [run.sh](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh) example scripts to your computer (from [here](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}})). - -{%- if OS == windows %} - -The [HPC-UGent web portal](https://login.hpc.ugent.be) provides a file browser that allows uploading files. -For more information see the [file browser section](web_portal.md#file-browser). - -Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell. - -!!! Info - - As an alternative, you can use WinSCP (see [our section](connecting.md#winscp)) - -{%- else %} - -On your local machine you can run: -
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py
-curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh
-
- -Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). -
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
-
ssh  {{userid}}@{{ loginnode }} 
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Info - - For more information about transfering files or `scp`, see [tranfer files from/to hpc](connecting.md#transfer-files-tofrom-the-hpc). - -{%- endif %} - -When running `ls` in your session on the {{hpcinfra}}, you should see the two files listed in your home directory (`~`): - -```shell -$ ls ~ -run.sh tensorflow_mnist.py -``` - -When you do not see these files, make sure you uploaded the files to your **home directory**. - -### Submitting a job - -Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script. - -A job script is a shell script, a text file that specifies the resources, -the software that is used (via `module load` statements), -and the steps that should be executed to run the calculation. - -Our job script looks like this: - -
-- run.sh --
- -```bash -#!/bin/bash - -module load TensorFlow/2.11.0-foss-2022a - -python tensorflow_mnist.py - -``` -As you can see this job script will run the Python script named **tensorflow_mnist.py**. - - -The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command. - -```shell -module swap cluster/{{othercluster}} -``` - -!!! Tip - - When submitting jobs with limited amount of resources, it is recommended to use the [debug/interactive cluster](interactive_debug.md#interactive-and-debug-cluster): `donphan`. - -{%- if site == 'Gent' %} - - To get a list of all clusters and their hardware, see . - -{%- endif %} - -This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command: - -```shell -$ qsub run.sh -{{jobid}} -``` - -This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job. - -!!! Warning "Make sure you understand what the `module` command does" - - Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, - but our active shell session is still running on the login node. - - It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on. - - When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`). - -For detailed information about `module` commands, read the [running batch jobs](running_batch_jobs.md) chapter. - -### Wait for job to be executed - -Your job is put into a queue before being executed, so it may take a while before it actually starts. -(see [when will my job start?](running_batch_jobs.md#when-will-my-job-start) for scheduling policy). - -You can get an overview of the active jobs using the `qstat` command: -
$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:00  Q {{othercluster}}
-
- -Eventually, after entering `qstat` again you should see that your job has started running: -
$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:01  R {{othercluster}}
-
- -If you don't see your job in the output of the `qstat` command anymore, your job has likely completed. - -Read [this section](running_batch_jobs.md#monitoring-and-managing-your-jobs) on how to interpret the output. - -### Inspect your results - -When your job finishes it generates 2 output files: - -- One for normal output messages (*stdout* output channel). -- One for warning and error messages (*stderr* output channel). - -By default located in the directory where you issued `qsub`. - -{%- if site == 'Gent' %} - -!!! Info - - For more information about the stdout and stderr output channels, see this [section](linux-tutorial/beyond_the_basics.md#inputoutput). - -{%- endif %} - -In our example when running ls in the current directory you should see 2 new files: - -- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}}; -- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}. - -!!! Info - - run.sh.e{{jobid}} should be empty (no errors or warnings). - -!!! Warning "Use your own job ID" - - Replace {{jobid}} with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`. - -When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this: -``` -Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz -11493376/11490434 [==============================] - 1s 0us/step -Epoch 1/5 -1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133 -Epoch 2/5 -1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571 -Epoch 3/5 -1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675 -Epoch 4/5 -1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727 -Epoch 5/5 -1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768 -313/313 - 0s - loss: 0.0782 - accuracy: 0.9764 -``` - -Hurray šŸŽ‰, we trained a deep learning model and achieved 97,64 percent accuracy. - -!!! Warning - - When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see [GPU clusters](gpu.md). - - For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster. - -### Next steps - -- [Running interactive jobs](running_interactive_jobs.md) -- [Running jobs with input/output data](running_jobs_with_input_output_data.md) -- [Multi core jobs/Parallel Computing](multi_core_jobs.md) -- [Interactive and debug cluster](interactive_debug.md#interactive-and-debug-cluster) - -For more examples see [Program examples](program_examples.md) and [Job script examples](jobscript_examples.md) diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt deleted file mode 100644 index f95191b96f0..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt +++ /dev/null @@ -1,25 +0,0 @@ - -To get access to the HPC-UGent infrastructure, visit Getting an HPC Account[1]. - -If you have not used Linux before, -now would be a good time to follow our Linux Tutorial[2]. - -#### A typical workflow looks like this: - -1. Connect to the login nodes -2. Transfer your files to the HPC-UGent infrastructure -3. Optional: compile your code and test it -4. Create a job script and submit your job -5. Wait for job to be executed -6. Study the results generated by your jobs, either on the cluster or - after downloading them locally. - -We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using TensorFlow[3]; -see the example scripts[4]. - -[1]: account.md -[2]: linux-tutorial/index.md -[3]: https://www.tensorflow.org/ -[4]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist - -reference: docs.hpc.ugent.be/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt deleted file mode 100644 index 94f17ac5070..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt +++ /dev/null @@ -1,19 +0,0 @@ - -There are two options to connect - -- Using a terminal to connect via SSH (for power users) (see First Time connection to the HPC-UGent infrastructure[1]) -- Using the web portal[2] - -Considering your operating system is **{{OS}}**, - - -!!! Info - - When having problems see the connection issues section on the troubleshooting page[3]. - - -[1]: connecting.md#first-time-connection-to-the-hpc-infrastructure -[2]: web_portal.md -[3]: troubleshooting.md#sec:connecting-issues - -reference: docs.hpc.ugent.be/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt deleted file mode 100644 index 3403b57f2c2..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt +++ /dev/null @@ -1,11 +0,0 @@ - -Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the HPC-UGent infrastructure and submitting your very first job. We'll also walk you through the process step by step using a practical example. - -In addition to this chapter, you might find the recording of the *Introduction to HPC-UGent* training session[1] to be a useful resource. - -Before proceeding, read the introduction to HPC[2] to gain an understanding of the HPC-UGent infrastructure and related terminology. - -[1]: https://www.ugent.be/hpc/en/training/introhpcugent-recording -[2]: introduction.md - -reference: docs.hpc.ugent.be/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt deleted file mode 100644 index 417416007f5..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt +++ /dev/null @@ -1,56 +0,0 @@ - -When your job finishes it generates 2 output files: - -- One for normal output messages (*stdout* output channel). -- One for warning and error messages (*stderr* output channel). - -By default located in the directory where you issued `qsub`. - - -!!! Info - - For more information about the stdout and stderr output channels, see this section[1]. - - -In our example when running ls in the current directory you should see 2 new files: - -- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}}; -- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}. - -!!! Info - - run.sh.e{{jobid}} should be empty (no errors or warnings). - -!!! Warning "Use your own job ID" - - Replace {{jobid}} with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`. - -When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this: -``` -Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz -11493376/11490434 [==============================] - 1s 0us/step -Epoch 1/5 -1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133 -Epoch 2/5 -1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571 -Epoch 3/5 -1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675 -Epoch 4/5 -1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727 -Epoch 5/5 -1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768 -313/313 - 0s - loss: 0.0782 - accuracy: 0.9764 -``` - -Hurray šŸŽ‰, we trained a deep learning model and achieved 97,64 percent accuracy. - -!!! Warning - - When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see GPU clusters[2]. - - For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster. - -[1]: linux-tutorial/beyond_the_basics.md#inputoutput -[2]: gpu.md - -reference: docs.hpc.ugent.be/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt deleted file mode 100644 index 804b56b8251..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt +++ /dev/null @@ -1,15 +0,0 @@ - -- Running interactive jobs[1] -- Running jobs with input/output data[2] -- Multi core jobs/Parallel Computing[3] -- Interactive and debug cluster[4] - -For more examples see Program examples[5] and Job script examples[6] -[1]: running_interactive_jobs.md -[2]: running_jobs_with_input_output_data.md -[3]: multi_core_jobs.md -[4]: interactive_debug.md#interactive-and-debug-cluster -[5]: program_examples.md -[6]: jobscript_examples.md - -reference: docs.hpc.ugent.be/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt deleted file mode 100644 index edb336fa06b..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt +++ /dev/null @@ -1,60 +0,0 @@ - -Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script. - -A job script is a shell script, a text file that specifies the resources, -the software that is used (via `module load` statements), -and the steps that should be executed to run the calculation. - -Our job script looks like this: - -
-- run.sh --
- -```bash -#!/bin/bash - -module load TensorFlow/2.11.0-foss-2022a - -python tensorflow_mnist.py - -``` -As you can see this job script will run the Python script named **tensorflow_mnist.py**. - - -The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command. - -```shell -module swap cluster/{{othercluster}} -``` - -!!! Tip - - When submitting jobs with limited amount of resources, it is recommended to use the debug/interactive cluster[1]: `donphan`. - - - To get a list of all clusters and their hardware, see . - - -This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command: - -```shell -$ qsub run.sh -{{jobid}} -``` - -This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job. - -!!! Warning "Make sure you understand what the `module` command does" - - Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, - but our active shell session is still running on the login node. - - It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on. - - When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`). - -For detailed information about `module` commands, read the running batch jobs[2] chapter. - -[1]: interactive_debug.md#interactive-and-debug-cluster -[2]: running_batch_jobs.md - -reference: docs.hpc.ugent.be/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt deleted file mode 100644 index 94dc30f6712..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt +++ /dev/null @@ -1,21 +0,0 @@ - -Now that you can login, it is time to transfer files from your local computer to your **home directory** on the HPC-UGent infrastructure. - -Download tensorflow_mnist.py[1] -and run.sh[2] example scripts to your computer (from here[3]). - - -When running `ls` in your session on the HPC-UGent infrastructure, you should see the two files listed in your home directory (`~`): - -```shell -$ ls ~ -run.sh tensorflow_mnist.py -``` - -When you do not see these files, make sure you uploaded the files to your **home directory**. - -[1]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py -[2]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh -[3]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist - -reference: docs.hpc.ugent.be/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt deleted file mode 100644 index de177946cf9..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt +++ /dev/null @@ -1,26 +0,0 @@ - -Your job is put into a queue before being executed, so it may take a while before it actually starts. -(see when will my job start?[1] for scheduling policy). - -You can get an overview of the active jobs using the `qstat` command: -
$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:00  Q {{othercluster}}
-
- -Eventually, after entering `qstat` again you should see that your job has started running: -
$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:01  R {{othercluster}}
-
- -If you don't see your job in the output of the `qstat` command anymore, your job has likely completed. - -Read this section[2] on how to interpret the output. - -[1]: running_batch_jobs.md#when-will-my-job-start -[2]: running_batch_jobs.md#monitoring-and-managing-your-jobs - -reference: docs.hpc.ugent.be/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt deleted file mode 100644 index e756b9a3cbe..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt deleted file mode 100644 index bac5dfcbfbe..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt +++ /dev/null @@ -1,18 +0,0 @@ -it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. - -Assuming you have already generated SSH keys in the previous step (Getting Access[1]), and that they are in a default location, you should now be able to login by running the following command: - -
ssh {{userid}}@{{loginnode}}
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Tip - - You can also still use the web portal (see shell access on web portal[2]) - -[1]: #getting-access -[2]: web_portal.md#shell-access - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt deleted file mode 100644 index f0b9d83bed3..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt deleted file mode 100644 index 441b54c7042..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt deleted file mode 100644 index d72ffccf01a..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt deleted file mode 100644 index 744c2c3db7a..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt deleted file mode 100644 index aca6e05d28c..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt +++ /dev/null @@ -1,21 +0,0 @@ - -On your local machine you can run: -
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
-curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
-
- -Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). -
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
-
ssh  {{userid}}@{{ loginnode }} 
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Info - - For more information about transfering files or `scp`, see tranfer files from/to hpc[1]. - -[1]: connecting.md#transfer-files-tofrom-the-hpc - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt deleted file mode 100644 index 93e6fdff171..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt deleted file mode 100644 index 8732e586981..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt deleted file mode 100644 index 2b1de2be838..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt +++ /dev/null @@ -1,13 +0,0 @@ -it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. -The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). - -See shell access[2] when using the web portal, or -connection to the HPC-UGent infrastructure[3] when using a terminal. - -Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. - -[1]: web_portal.md -[2]: web_portal.md#shell-access -[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt deleted file mode 100644 index 4e60f862a0a..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt deleted file mode 100644 index f7ae9f96226..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt deleted file mode 100644 index 71f384bcf17..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt deleted file mode 100644 index d72ba48195a..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt deleted file mode 100644 index fce05042ab2..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt +++ /dev/null @@ -1,21 +0,0 @@ - -On your local machine you can run: -
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
-curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
-
- -Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). -
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
-
ssh  {{userid}}@{{ loginnode }} 
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Info - - For more information about transfering files or `scp`, see tranfer files from/to hpc[1]. - -[1]: connecting.md#transfer-files-tofrom-the-hpc - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt deleted file mode 100644 index 2ef8770504b..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt deleted file mode 100644 index 874af365704..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt deleted file mode 100644 index ce0b873b2b0..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt +++ /dev/null @@ -1,13 +0,0 @@ -it is recommended to use the web portal. -The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). - -See shell access[2] when using the web portal, or -connection to the HPC-UGent infrastructure[3] when using a terminal. - -Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. - -[1]: web_portal.md -[2]: web_portal.md#shell-access -[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt deleted file mode 100644 index 44d1f17b73b..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt deleted file mode 100644 index 730fbbc3b74..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt deleted file mode 100644 index 55df915125a..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt deleted file mode 100644 index f67d48ece4a..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt deleted file mode 100644 index dce86fc7cf3..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt +++ /dev/null @@ -1,15 +0,0 @@ - -The HPC-UGent web portal[1] provides a file browser that allows uploading files. -For more information see the file browser section[2]. - -Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell. - -!!! Info - - As an alternative, you can use WinSCP (see our section[3]) - -[1]: https://login.hpc.ugent.be -[2]: web_portal.md#file-browser -[3]: connecting.md#winscp - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt deleted file mode 100644 index bdd7387e379..00000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py deleted file mode 100644 index 46b121610c9..00000000000 --- a/scripts/HPC chatbot preprocessor/if_mangler.py +++ /dev/null @@ -1,72 +0,0 @@ -import re - -# global variable to keep track of latest if-statement scope -is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} - - -def mangle_os_ifs(line): - global is_os - - match = re.search(r'\{%(.*?)%}(.*)', line) - - start_index = 0 - added_length = 0 - - while match: - - constr_match = re.search(r'\{%.*?%}', match.string) - if_match = re.search(r'if ', match.group(1)) - if_os_match = re.search(r'if OS == ', match.group(1)) - endif_match = re.search(r'endif', match.group(1)) - - if endif_match: - if is_os == 2: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 0 - if is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 2 - elif is_os == 1: - is_os = 2 - elif if_match: - if if_os_match: - if is_os == 2: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 3 - else: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 2 - else: - if is_os == 2: - is_os = 1 - else: - is_os = 0 - else: - if is_os == 2 or is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - start_index += constr_match.end() - match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) - return line - - -def mangle_ifs(directory, file): - with open(".\\if_mangled_files\\" + file, 'w') as write_file: - with open(directory, 'r') as read_file: - for line in read_file: - new_line = mangle_os_ifs(line) - write_file.write(new_line) diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py deleted file mode 100644 index 603a453ecf7..00000000000 --- a/scripts/HPC chatbot preprocessor/jinja_parser.py +++ /dev/null @@ -1,31 +0,0 @@ -import yaml -from jinja2 import Template, FileSystemLoader, Environment, ChoiceLoader -from if_mangler import mangle_ifs - - -# function that let's jinja do its thing to format the files expect for the os-related if-statements -def jinja_parser(filename, copy_location): - # Read the YAML file - with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: - words_dict = yaml.safe_load(yml_file) - - # ugly fix for index.md error - additional_context = { - 'config': { - 'repo_url': 'https://github.com/hpcugent/vsc_user_docs' - } - } - combined_context = {**words_dict, **additional_context} - - # Mangle the OS-related if-statements - mangle_ifs(copy_location, filename) - - # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) - templateEnv = Environment(loader=template_loader) - template = templateEnv.get_template(filename) - rendered_content = template.render(combined_context) - - # Save the rendered content to a new file - with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: - output_file.write(rendered_content) From b595301e5bd4b0c19a1beea04affeedb31e00a8c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 13:12:37 +0200 Subject: [PATCH 018/152] Rename directory --- .../chatbot_parser.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{HPC chatbot preprocessor => HPC_chatbot_preprocessor}/chatbot_parser.py (100%) diff --git a/scripts/HPC chatbot preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py similarity index 100% rename from scripts/HPC chatbot preprocessor/chatbot_parser.py rename to scripts/HPC_chatbot_preprocessor/chatbot_parser.py From 90c8ab760b7ff96d1536d5d85e29a36ea8bf90b5 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 13:26:20 +0200 Subject: [PATCH 019/152] add a main function --- .../chatbot_parser.py | 256 +++++++++--------- 1 file changed, 130 insertions(+), 126 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b6e42e99ea0..79951a5d0da 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -30,7 +30,7 @@ problem_files = ["getting_started.md", "navigating.md"] # global variable to keep track of latest if-statement scope -is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} +is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} ################### define functions ################### @@ -356,138 +356,142 @@ def make_valid_title(s): return valid_filename +def main(): + global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos + # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason + remove_directory_tree(".\\parsed_mds") + remove_directory_tree(".\\copies") + remove_directory_tree(".\\if_mangled_files") -# remove the directories from a previous run of the parser -remove_directory_tree(".\\parsed_mds") -remove_directory_tree(".\\copies") -remove_directory_tree(".\\if_mangled_files") + # make the necessary directories + if not os.path.exists(".\\copies"): + os.mkdir(".\\copies") -# make the necessary directories -if not os.path.exists(".\\copies"): - os.mkdir(".\\copies") + if not os.path.exists(".\\copies\\linux"): + os.mkdir(".\\copies\\linux") -if not os.path.exists(".\\copies\\linux"): - os.mkdir(".\\copies\\linux") + if not os.path.exists(".\\parsed_mds"): + os.mkdir(".\\parsed_mds") -if not os.path.exists(".\\parsed_mds"): - os.mkdir(".\\parsed_mds") + if not os.path.exists(".\\if_mangled_files"): + os.mkdir(".\\if_mangled_files") -if not os.path.exists(".\\if_mangled_files"): - os.mkdir(".\\if_mangled_files") + for filenames in [filenames_generic, filenames_linux]: + for filename in filenames.keys(): + ################### define/reset loop specific variables ################### -for filenames in [filenames_generic, filenames_linux]: - for filename in filenames.keys(): - ################### define/reset loop specific variables ################### + # variable that keeps track of whether file is part of the linux tutorial + is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) - # variable that keeps track of whether file is part of the linux tutorial - is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) - - # make a copy of the original file in order to make sure the original does not get altered - if is_linux_tutorial: - copy_file = ".\\copies\\linux\\" + filename - else: - copy_file = ".\\copies\\" + filename - shutil.copyfile(filenames[filename], copy_file) - - # variable that keeps track of the directories that are used to write in at different levels - if is_linux_tutorial: - root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" - else: - root_dir_generic = ".\\parsed_mds\\generic\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" - - # variable for the main title (needed for reference links) - main_title = filename[:-3] - - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(5)] - - # variable to keep track whether we're dealing with OS-specific info or not - OS_specific = False - - # variable that keeps track of the latest non-zero level title and corresponding directory - last_title_level = 1 - last_title = None - last_directory = None - last_was_title = False - - # list to keep track of links in the text - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - - # dictionaries to keep track of current OS - active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} - - # variable that shows whether the first title has been reached yet - after_first_title = False - - # variable that is used to be sure that we are detecting titles and not comments from codeblocks - in_code_block = False - - ################### actually parse the md file ################### - - # create directories for the source markdown file - create_directory(root_dir_generic) - create_directory(".\\parsed_mds\\os_specific") - create_directory(root_dir_os_specific_linux) - create_directory(root_dir_os_specific_windows) - create_directory(root_dir_os_specific_macos) - create_directory(root_dir_generic + curr_dirs[0]) - create_directory(root_dir_os_specific_linux + curr_dirs[0]) - create_directory(root_dir_os_specific_windows + curr_dirs[0]) - create_directory(root_dir_os_specific_macos + curr_dirs[0]) - - # process the jinja macros - jinja_parser(filename, copy_file) - - # convert the files without proper markdown layout into markdown using pandoc - if "linux-tutorial" in filenames[filename] and filename in problem_files: - pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) - - # open the file and store line by line in the right file - with open(copy_file, 'r') as readfile: - - for line in readfile: - title_level, title, directory = check_for_title(line) - - detect_in_code_block(line) - - # line is a title with a maximum depth of 4 - if title_level > 0: - last_title_level = title_level - last_title = title - last_directory = directory - after_first_title = True - - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": + # make a copy of the original file in order to make sure the original does not get altered + if is_linux_tutorial: + copy_file = ".\\copies\\linux\\" + filename + else: + copy_file = ".\\copies\\" + filename + shutil.copyfile(filenames[filename], copy_file) + + # variable that keeps track of the directories that are used to write in at different levels + if is_linux_tutorial: + root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" + else: + root_dir_generic = ".\\parsed_mds\\generic\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" + + # variable for the main title (needed for reference links) + main_title = filename[:-3] + + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for i in range(5)] + + # variable to keep track whether we're dealing with OS-specific info or not + OS_specific = False + + # variable that keeps track of the latest non-zero level title and corresponding directory + last_title_level = 1 + last_title = None + last_directory = None + last_was_title = False + + # list to keep track of links in the text + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + # dictionaries to keep track of current OS + active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + + # variable that shows whether the first title has been reached yet + after_first_title = False + + # variable that is used to be sure that we are detecting titles and not comments from codeblocks + in_code_block = False + + ################### actually parse the md file ################### + + # create directories for the source markdown file + create_directory(root_dir_generic) + create_directory(".\\parsed_mds\\os_specific") + create_directory(root_dir_os_specific_linux) + create_directory(root_dir_os_specific_windows) + create_directory(root_dir_os_specific_macos) + create_directory(root_dir_generic + curr_dirs[0]) + create_directory(root_dir_os_specific_linux + curr_dirs[0]) + create_directory(root_dir_os_specific_windows + curr_dirs[0]) + create_directory(root_dir_os_specific_macos + curr_dirs[0]) + + # process the jinja macros + jinja_parser(filename, copy_file) + + # convert the files without proper markdown layout into markdown using pandoc + if "linux-tutorial" in filenames[filename] and filename in problem_files: + pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) + + # open the file and store line by line in the right file + with open(copy_file, 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + detect_in_code_block(line) + + # line is a title with a maximum depth of 4 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": + choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2]) - - # write end of file for the last file - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos, is_linux_tutorial) - -remove_directory_tree(".\\copies") -remove_directory_tree(".\\if_mangled_files") + + # write end of file for the last file + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos, is_linux_tutorial) + + remove_directory_tree(".\\copies") + remove_directory_tree(".\\if_mangled_files") + + +main() # TODO: reconsider maximum depth to be detected as title (now at four) # TODO: adapt script to be used from command line From b8ae7066d089202d8554ae5f00e98a9281d7c25d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 14:16:27 +0200 Subject: [PATCH 020/152] make file paths non os-specific --- .../chatbot_parser.py | 106 +++++++++--------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 79951a5d0da..f67d0f0d529 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,7 +12,7 @@ ################### define global variables ################### # variable that keeps track of the source directories -source_directories = ["..\\..\\mkdocs\\docs\\HPC\\", "..\\..\\mkdocs\\docs\\HPC\\linux-tutorial"] +source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] # list of all the filenames filenames_generic = {} @@ -67,22 +67,21 @@ def check_for_title(curr_line): return 0, None, None else: if last_title is not None: - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial) + write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", links_linux, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), "Windows", links_windows, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", links_macos, is_linux_tutorial) reset_link_lists() - curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + make_valid_title( - curr_line[logic_output + 1:-1].replace(' ', '-')) + curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) - create_directory(root_dir_generic + curr_dirs[logic_output]) - create_directory(root_dir_os_specific_linux + curr_dirs[logic_output]) - create_directory(root_dir_os_specific_windows + curr_dirs[logic_output]) - create_directory(root_dir_os_specific_macos + curr_dirs[logic_output]) + create_directory(os.path.join(root_dir_generic, curr_dirs[logic_output])) + create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[logic_output])) + create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[logic_output])) + create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[logic_output])) update_lower_curr_dir(curr_dirs[logic_output], logic_output) return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] @@ -127,8 +126,12 @@ def replace_markdown_markers(curr_line, linklist): # function that let's jinja do its thing to format the files expect for the os-related if-statements def jinja_parser(filename, copy_location): + + # YAML file location + yml_file_path = os.path.join('..', '..', 'mkdocs', 'extra', 'gent.yml') + # Read the YAML file - with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: + with open(yml_file_path, 'r') as yml_file: words_dict = yaml.safe_load(yml_file) # ugly fix for index.md error @@ -143,7 +146,7 @@ def jinja_parser(filename, copy_location): mangle_ifs(copy_location, filename) # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) + template_loader = ChoiceLoader([FileSystemLoader(searchpath='if_mangled_files'), FileSystemLoader(searchpath=os.path.join("..", "..", "mkdocs", "docs", "HPC"))]) templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) @@ -214,7 +217,7 @@ def mangle_os_ifs(line): def mangle_ifs(directory, file): - with open(".\\if_mangled_files\\" + file, 'w') as write_file: + with open(os.path.join("if_mangled_files", file), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: new_line = mangle_os_ifs(line) @@ -305,13 +308,13 @@ def choose_and_write_to_file(curr_line): # check that the line is part of the website for gent if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ active_OS_if_states["macos"] == "inactive": - write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) + write_text_to_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), curr_line) if active_OS_if_states["linux"] == "active": - write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) + write_text_to_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), curr_line) if active_OS_if_states["windows"] == "active": - write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line) + write_text_to_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), curr_line) if active_OS_if_states["macos"] == "active": - write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line) + write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line) # function that adds a reference link at the end of every txt file @@ -356,25 +359,26 @@ def make_valid_title(s): return valid_filename + def main(): global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - remove_directory_tree(".\\parsed_mds") - remove_directory_tree(".\\copies") - remove_directory_tree(".\\if_mangled_files") + remove_directory_tree("parsed_mds") + remove_directory_tree("copies") + remove_directory_tree("if_mangled_files") # make the necessary directories - if not os.path.exists(".\\copies"): - os.mkdir(".\\copies") + if not os.path.exists("copies"): + os.mkdir("copies") - if not os.path.exists(".\\copies\\linux"): - os.mkdir(".\\copies\\linux") + if not os.path.exists(os.path.join("copies", "linux")): + os.mkdir(os.path.join("copies", "linux")) - if not os.path.exists(".\\parsed_mds"): - os.mkdir(".\\parsed_mds") + if not os.path.exists("parsed_mds"): + os.mkdir("parsed_mds") - if not os.path.exists(".\\if_mangled_files"): - os.mkdir(".\\if_mangled_files") + if not os.path.exists("if_mangled_files"): + os.mkdir("if_mangled_files") for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): @@ -385,22 +389,22 @@ def main(): # make a copy of the original file in order to make sure the original does not get altered if is_linux_tutorial: - copy_file = ".\\copies\\linux\\" + filename + copy_file = os.path.join("copies", "linux", filename) else: - copy_file = ".\\copies\\" + filename + copy_file = os.path.join("copies", filename) shutil.copyfile(filenames[filename], copy_file) # variable that keeps track of the directories that are used to write in at different levels if is_linux_tutorial: - root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" + root_dir_generic = os.path.join("parsed_mds", "generic", "linux_tutorial") + root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux", "linux_tutorial") + root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows", "linux_tutorial") + root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos", "linux_tutorial") else: - root_dir_generic = ".\\parsed_mds\\generic\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" + root_dir_generic = os.path.join("parsed_mds", "generic") + root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux") + root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows") + root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos") # variable for the main title (needed for reference links) main_title = filename[:-3] @@ -436,14 +440,14 @@ def main(): # create directories for the source markdown file create_directory(root_dir_generic) - create_directory(".\\parsed_mds\\os_specific") + create_directory(os.path.join("parsed_mds", "os_specific")) create_directory(root_dir_os_specific_linux) create_directory(root_dir_os_specific_windows) create_directory(root_dir_os_specific_macos) - create_directory(root_dir_generic + curr_dirs[0]) - create_directory(root_dir_os_specific_linux + curr_dirs[0]) - create_directory(root_dir_os_specific_windows + curr_dirs[0]) - create_directory(root_dir_os_specific_macos + curr_dirs[0]) + create_directory(os.path.join(root_dir_generic, curr_dirs[0])) + create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[0])) + create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[0])) + create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[0])) # process the jinja macros jinja_parser(filename, copy_file) @@ -480,16 +484,18 @@ def main(): choose_and_write_to_file(next_action[2]) # write end of file for the last file - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, + is_linux_tutorial) + write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", links_linux, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), + "Windows", links_windows, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", links_macos, is_linux_tutorial) - remove_directory_tree(".\\copies") - remove_directory_tree(".\\if_mangled_files") + remove_directory_tree("copies") + remove_directory_tree("if_mangled_files") main() From b7514973facd2edeb274161b67ae4eee53140229 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 14:54:41 +0200 Subject: [PATCH 021/152] use docstrings to document the functions --- .../chatbot_parser.py | 143 +++++++++++++++--- 1 file changed, 125 insertions(+), 18 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index f67d0f0d529..f5596fd5b1d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -34,14 +34,24 @@ ################### define functions ################### -# function that removes the previous file structure before starting the process of making a new one def remove_directory_tree(old_directory): + """ + function that removes a full directory tree + + :param old_directory: the directory to be removed + :return: + """ if os.path.exists(old_directory): shutil.rmtree(old_directory) -# function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title) def check_for_title_logic(curr_line): + """ + function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title) + + :param curr_line: the line to be checked for a title + :return: depth of the title + """ global curr_dirs match = re.match(r'^#+ ', curr_line) if match and len(match.group(0)) <= 5: @@ -50,8 +60,12 @@ def check_for_title_logic(curr_line): return 0 -# function that resets the contents of the link_lists def reset_link_lists(): + """ + function that resets the contents of the link_lists + + :return: + """ global links_generic, links_linux, links_windows, links_macos links_generic = [] links_linux = [] @@ -59,8 +73,15 @@ def reset_link_lists(): links_macos = [] -# function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables def check_for_title(curr_line): + """ + function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables + + :param curr_line: the line to be checked for a title + :return: the depth of the title + :return: the title found in the line if any + :return: the new directory in which the next file will be written + """ global curr_dirs, last_title, in_code_block logic_output = check_for_title_logic(curr_line) if logic_output == 0 or in_code_block: @@ -87,28 +108,51 @@ def check_for_title(curr_line): return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] -# function used to detect codeblocks and make sure the comments don't get detected as titles def detect_in_code_block(curr_line): + """ + function used to detect codeblocks and make sure the comments don't get detected as titles + + :param curr_line: the line in which the start or end of a codeblock needs to be detected + :return: + """ global in_code_block if '```' in curr_line or (('
' in curr_line) ^ ('
' in curr_line)): in_code_block = not in_code_block -# function that creates directories if needed def create_directory(new_directory): + """ + function that creates new directories + + :param new_directory: directory to be created + :return: + """ if not os.path.exists(new_directory): os.mkdir(new_directory) -# function that updates the curr_dir variables when needed def update_lower_curr_dir(curr_directory, level): + """ + function that updates the curr_dir variables when needed + + :param curr_directory: the current directory to which the lower level current directories need to be updated + :param level: the depth of the current directory + :return: + """ global curr_dirs for i in range(level + 1, 4): curr_dirs[i] = curr_directory -# function that replaces certain markdown structures with the equivalent used on the website def replace_markdown_markers(curr_line, linklist): + """ + function that replaces certain markdown structures with the equivalent used on the website + + :param curr_line: the current line on which markdown structures need to be replaced + :param linklist: the list used to store links that need to be printed at the end of the file + :return curr_line: the adapted current line + :return linklist: the updated linklist + """ # replace links with a reference matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: @@ -124,9 +168,14 @@ def replace_markdown_markers(curr_line, linklist): return curr_line, linklist -# function that let's jinja do its thing to format the files expect for the os-related if-statements def jinja_parser(filename, copy_location): + """ + function that let's jinja do its thing to format the files except for the os-related if-statements + :param filename: the name of the file that needs to be formatted using jinja + :param copy_location: the location of the file that needs to be formatted using jinja + :return: + """ # YAML file location yml_file_path = os.path.join('..', '..', 'mkdocs', 'extra', 'gent.yml') @@ -157,6 +206,12 @@ def jinja_parser(filename, copy_location): def mangle_os_ifs(line): + """ + function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. + + :param line: the current line to check for os-related if-statements + :return line: the modified line with mangled os-related if-statements + """ global is_os match = re.search(r'\{%(.*?)%}(.*)', line) @@ -217,6 +272,13 @@ def mangle_os_ifs(line): def mangle_ifs(directory, file): + """ + function that writes the if-mangled version of a file to a location where the jinja parser will use it + + :param directory: the directory of the file to be if mangled + :param file: the filename of the file to be mangled + :return: + """ with open(os.path.join("if_mangled_files", file), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: @@ -224,8 +286,19 @@ def mangle_ifs(directory, file): write_file.write(new_line) -# function that checks for if-statements def check_if_statements(curr_line): + """ + function that checks for if-statements + + :param curr_line: the line to be checked for if-statements to build the directory structure + :return: the next action to be done with the line: + "done": An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. + "check_extra_message": An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. + "write_text": No if-statement has been found, write the current line to a file (can also be part of the current line) + "write_text_and_check_extra_message": An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line. + :return: the extra message to be checked, if any + :return: the text to be written to the file, if any + """ # check whether the first part of the line contains information wrt if-statements match = re.search(r'^\{-if-%([^%]*)%-if-}(.*)', curr_line) @@ -288,8 +361,14 @@ def check_if_statements(curr_line): return "write_text", None, curr_line -# function that writes a line to a file def write_text_to_file(file_name, curr_line): + """ + function that writes a line to a file + + :param file_name: target file to write the line to + :param curr_line: line to be written to the file + :return: + """ global links_generic, links_linux, links_windows, links_macos with open(file_name, "a") as write_file: if "generic" in file_name: @@ -303,8 +382,13 @@ def write_text_to_file(file_name, curr_line): write_file.write(curr_line) -# function that decides what file to write text to def choose_and_write_to_file(curr_line): + """ + function that decides what file to write text to + + :param curr_line: line to be written to a file + :return: + """ # check that the line is part of the website for gent if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ active_OS_if_states["macos"] == "inactive": @@ -317,14 +401,28 @@ def choose_and_write_to_file(curr_line): write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line) -# function that adds a reference link at the end of every txt file def add_reference_link(file_location, reference_link): + """ + function that adds a reference link at the end of every txt file + + :param file_location: the file that needs a reference link + :param reference_link: the reference link that needs to be written + :return: + """ with open(file_location, 'a') as write_file: write_file.write("\nreference: " + reference_link + "\n") -# function that adds the links that should be at the end of a file def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): + """ + function that adds the links that should be at the end of a file + + :param file_location: the location of the file + :param OS: the OS of the file + :param linklist: the links that should be at the end of the file + :param is_linux_tutorial_: boolean indicating whether the file is part of the linux tutorial + :return: + """ if len(OS) > 0: OS = OS + "/" @@ -343,16 +441,21 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) -# function that makes sure all titles can be used as valid filenames -def make_valid_title(s): +def make_valid_title(title): + """ + function that makes sure all titles can be used as valid filenames + + :param title: the string that will be used as title and filename + :return valid_filename: the adapted title that can be used as filename + """ # Define a regex pattern for invalid characters on both Windows and Linux invalid_chars = r'[<>:"/\\|?*\0()]' # get rid of extra information between {} brackets - s = re.sub(r'\{.*?}', '', s) + s = re.sub(r'\{.*?}', '', title) # Remove invalid characters - valid_filename = re.sub(invalid_chars, '', s) + valid_filename = re.sub(invalid_chars, '', title) # Strip leading/trailing whitespace valid_filename = valid_filename.strip().strip('-') @@ -361,6 +464,10 @@ def make_valid_title(s): def main(): + """ + main function + :return: + """ global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason remove_directory_tree("parsed_mds") From 0f8eb5dfa6e7cc8adee238fd7467dbc9247e0012 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 15:15:56 +0200 Subject: [PATCH 022/152] rewrite the if-mangler to make it more readable --- .../chatbot_parser.py | 35 +++++++------------ 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index f5596fd5b1d..4f1865b9411 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -225,35 +225,27 @@ def mangle_os_ifs(line): if_match = re.search(r'if ', match.group(1)) if_os_match = re.search(r'if OS == ', match.group(1)) endif_match = re.search(r'endif', match.group(1)) + pos_first_mangle = constr_match.start() + start_index + added_length + 1 + pos_second_mangle = constr_match.end() + start_index + added_length - 1 + # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: - if is_os == 2: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 0 - if is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] + if is_os == 2 or is_os == 3: + line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] added_length += 8 - is_os = 2 + if is_os == 2: + is_os = 0 + elif is_os == 3: + is_os = 2 elif is_os == 1: is_os = 2 elif if_match: if if_os_match: + line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] + added_length += 8 if is_os == 2: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 is_os = 3 else: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 is_os = 2 else: if is_os == 2: @@ -262,10 +254,9 @@ def mangle_os_ifs(line): is_os = 0 else: if is_os == 2 or is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[constr_match.end() + start_index + added_length - 1:] added_length += 8 + start_index += constr_match.end() match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) return line From 9938e921674d5e46a3917feef8a780f22b427440 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 16:09:23 +0200 Subject: [PATCH 023/152] got rid of most global variables --- .../chatbot_parser.py | 162 +++++++----------- 1 file changed, 63 insertions(+), 99 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 4f1865b9411..e7a88e4de7c 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -29,9 +29,6 @@ # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc problem_files = ["getting_started.md", "navigating.md"] -# global variable to keep track of latest if-statement scope -is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} - ################### define functions ################### def remove_directory_tree(old_directory): @@ -45,57 +42,48 @@ def remove_directory_tree(old_directory): shutil.rmtree(old_directory) -def check_for_title_logic(curr_line): - """ - function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title) - - :param curr_line: the line to be checked for a title - :return: depth of the title - """ - global curr_dirs - match = re.match(r'^#+ ', curr_line) - if match and len(match.group(0)) <= 5: - return len(match.group(0)) - 1 - else: - return 0 - - -def reset_link_lists(): - """ - function that resets the contents of the link_lists - - :return: - """ - global links_generic, links_linux, links_windows, links_macos - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - - -def check_for_title(curr_line): +def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial_, in_code_block_): """ function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables :param curr_line: the line to be checked for a title + :param main_title: the main title of the file, needed in the case where a file is finished + :param last_directory: the most recently encountered directory + :param last_title: the most recently encountered title + :param curr_dirs: the most recent directories at each title level + :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial + :param in_code_block_: boolean to indicate whether the current line is part of a codeblock :return: the depth of the title :return: the title found in the line if any :return: the new directory in which the next file will be written """ - global curr_dirs, last_title, in_code_block - logic_output = check_for_title_logic(curr_line) - if logic_output == 0 or in_code_block: - return 0, None, None + global links_generic, links_linux, links_windows, links_macos + + # detect titles + match = re.match(r'^#+ ', curr_line) + if match and len(match.group(0)) <= 5: + logic_output = len(match.group(0)) - 1 + else: + logic_output = 0 + + # make necessary changes if a title has been detected + if logic_output == 0 or in_code_block_: + return 0, None, None, curr_dirs else: if last_title is not None: - write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial) + write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial_, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", - links_linux, is_linux_tutorial) + links_linux, is_linux_tutorial_, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), "Windows", - links_windows, is_linux_tutorial) + links_windows, is_linux_tutorial_, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", - links_macos, is_linux_tutorial) - reset_link_lists() + links_macos, is_linux_tutorial_, main_title, last_title) + + # reset the link lists + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) @@ -104,20 +92,11 @@ def check_for_title(curr_line): create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[logic_output])) create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[logic_output])) - update_lower_curr_dir(curr_dirs[logic_output], logic_output) - return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] - - -def detect_in_code_block(curr_line): - """ - function used to detect codeblocks and make sure the comments don't get detected as titles + # update the lower order current directories + for i in range(logic_output + 1, 4): + curr_dirs[i] = curr_dirs[logic_output] - :param curr_line: the line in which the start or end of a codeblock needs to be detected - :return: - """ - global in_code_block - if '```' in curr_line or (('
' in curr_line) ^ ('
' in curr_line)): - in_code_block = not in_code_block + return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs def create_directory(new_directory): @@ -131,19 +110,6 @@ def create_directory(new_directory): os.mkdir(new_directory) -def update_lower_curr_dir(curr_directory, level): - """ - function that updates the curr_dir variables when needed - - :param curr_directory: the current directory to which the lower level current directories need to be updated - :param level: the depth of the current directory - :return: - """ - global curr_dirs - for i in range(level + 1, 4): - curr_dirs[i] = curr_directory - - def replace_markdown_markers(curr_line, linklist): """ function that replaces certain markdown structures with the equivalent used on the website @@ -205,14 +171,14 @@ def jinja_parser(filename, copy_location): output_file.write(rendered_content) -def mangle_os_ifs(line): +def mangle_os_ifs(line, is_os): """ function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. :param line: the current line to check for os-related if-statements + :param is_os: boolean keep track of the current os-state of the if-statements. Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} :return line: the modified line with mangled os-related if-statements """ - global is_os match = re.search(r'\{%(.*?)%}(.*)', line) @@ -259,7 +225,7 @@ def mangle_os_ifs(line): start_index += constr_match.end() match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) - return line + return line, is_os def mangle_ifs(directory, file): @@ -270,18 +236,22 @@ def mangle_ifs(directory, file): :param file: the filename of the file to be mangled :return: """ + # variable to keep track of latest if-statement scope + is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} + with open(os.path.join("if_mangled_files", file), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: - new_line = mangle_os_ifs(line) + new_line, is_os = mangle_os_ifs(line, is_os) write_file.write(new_line) -def check_if_statements(curr_line): +def check_if_statements(curr_line, active_OS_if_states): """ function that checks for if-statements :param curr_line: the line to be checked for if-statements to build the directory structure + :param active_OS_if_states: dictionary keeping track of the active OS states according to the if-statements :return: the next action to be done with the line: "done": An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. "check_extra_message": An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. @@ -373,11 +343,14 @@ def write_text_to_file(file_name, curr_line): write_file.write(curr_line) -def choose_and_write_to_file(curr_line): +def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title): """ function that decides what file to write text to :param curr_line: line to be written to a file + :param active_OS_if_states: dictionary keeping track of which OSes are active according to the if-statements + :param last_directory: most recently made directory + :param last_title: the most recently encountered title :return: """ # check that the line is part of the website for gent @@ -392,19 +365,7 @@ def choose_and_write_to_file(curr_line): write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line) -def add_reference_link(file_location, reference_link): - """ - function that adds a reference link at the end of every txt file - - :param file_location: the file that needs a reference link - :param reference_link: the reference link that needs to be written - :return: - """ - with open(file_location, 'a') as write_file: - write_file.write("\nreference: " + reference_link + "\n") - - -def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): +def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_title, last_title): """ function that adds the links that should be at the end of a file @@ -412,6 +373,8 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): :param OS: the OS of the file :param linklist: the links that should be at the end of the file :param is_linux_tutorial_: boolean indicating whether the file is part of the linux tutorial + :param main_title: the main title of the file, to be used in the reference link + :param last_title: the most recently encountered title :return: """ if len(OS) > 0: @@ -429,7 +392,8 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): linux_part = "" # finally add the reference link - add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) + with open(file_location, 'a') as write_file: + write_file.write("\nreference: docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-') + "\n") def make_valid_title(title): @@ -459,7 +423,7 @@ def main(): main function :return: """ - global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos + global root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, links_generic, links_linux, links_windows, links_macos # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason remove_directory_tree("parsed_mds") remove_directory_tree("copies") @@ -558,9 +522,10 @@ def main(): with open(copy_file, 'r') as readfile: for line in readfile: - title_level, title, directory = check_for_title(line) + title_level, title, directory, curr_dirs = check_for_title(line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial, in_code_block) - detect_in_code_block(line) + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block # line is a title with a maximum depth of 4 if title_level > 0: @@ -572,25 +537,24 @@ def main(): # line is not a title elif after_first_title: # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) + next_action = check_if_statements(line, active_OS_if_states) while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) + choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title) + next_action = check_if_statements(next_action[1], active_OS_if_states) if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2]) + choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title) # write end of file for the last file write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, - is_linux_tutorial) + is_linux_tutorial, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", - links_linux, is_linux_tutorial) + links_linux, is_linux_tutorial, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), - "Windows", - links_windows, is_linux_tutorial) + "Windows", links_windows, is_linux_tutorial, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", - links_macos, is_linux_tutorial) + links_macos, is_linux_tutorial, main_title, last_title) remove_directory_tree("copies") remove_directory_tree("if_mangled_files") From 508b22c7b3a485f8fdb64059a45d2ee9dfdd4f04 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 16:30:48 +0200 Subject: [PATCH 024/152] fixed some issues with if statements --- .../chatbot_parser.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e7a88e4de7c..bbab687bd39 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,7 +12,7 @@ ################### define global variables ################### # variable that keeps track of the source directories -source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] +source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC")]#, os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] # list of all the filenames filenames_generic = {} @@ -189,7 +189,7 @@ def mangle_os_ifs(line, is_os): constr_match = re.search(r'\{%.*?%}', match.string) if_match = re.search(r'if ', match.group(1)) - if_os_match = re.search(r'if OS == ', match.group(1)) + if_os_match = re.search(r'if OS ', match.group(1)) endif_match = re.search(r'endif', match.group(1)) pos_first_mangle = constr_match.start() + start_index + added_length + 1 pos_second_mangle = constr_match.end() + start_index + added_length - 1 @@ -228,18 +228,18 @@ def mangle_os_ifs(line, is_os): return line, is_os -def mangle_ifs(directory, file): +def mangle_ifs(directory, filename): """ function that writes the if-mangled version of a file to a location where the jinja parser will use it :param directory: the directory of the file to be if mangled - :param file: the filename of the file to be mangled + :param filename: the filename of the file to be mangled :return: """ # variable to keep track of latest if-statement scope is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} - with open(os.path.join("if_mangled_files", file), 'w') as write_file: + with open(os.path.join("if_mangled_files", filename), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: new_line, is_os = mangle_os_ifs(line, is_os) @@ -261,17 +261,17 @@ def check_if_statements(curr_line, active_OS_if_states): :return: the text to be written to the file, if any """ # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{-if-%([^%]*)%-if-}(.*)', curr_line) + match = re.search(r'^\{-if-%(.*?)%-if-}(.*)', curr_line) # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{-if-%[^%]*%-if-})(.*)', curr_line) + match_large = re.search(r'^(.*)(\{-if-%.*?%-if-})(.*)', curr_line) if match: content = match.group(1) # new if-statement wrt OS if re.search(r'if OS == ', content): - OS = content[9:-1] + OS = content.split()[-1] # set new active OS active_OS_if_states[OS] = "active" @@ -281,6 +281,17 @@ def check_if_statements(curr_line, active_OS_if_states): if other_OS != OS and active_OS_if_states[other_OS] == "active": active_OS_if_states[other_OS] = "inactive" + elif re.search(r'if OS != ', content): + OS = content.split()[-1] + + # set new active OS + active_OS_if_states[OS] = "inactive" + + # set other inactive ones on active + for other_OS in active_OS_if_states.keys(): + if other_OS != OS and active_OS_if_states[other_OS] == "inactive": + active_OS_if_states[other_OS] = "active" + # endif statement wrt OS elif re.search(r'endif', content): if str(1) in active_OS_if_states.values(): @@ -556,8 +567,8 @@ def main(): write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", links_macos, is_linux_tutorial, main_title, last_title) - remove_directory_tree("copies") - remove_directory_tree("if_mangled_files") + # remove_directory_tree("copies") + # remove_directory_tree("if_mangled_files") main() From a25ce2dc8bf7ffb9f732b6eb7e796ad54cd724b5 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 16:36:04 +0200 Subject: [PATCH 025/152] fixed some issues with if statements --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index bbab687bd39..fb7d8a8b176 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,7 +12,7 @@ ################### define global variables ################### # variable that keeps track of the source directories -source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC")]#, os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] +source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] # list of all the filenames filenames_generic = {} From 80d0535a74564ecd18f626d5c18568cc17c7d7fc Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 17:03:07 +0200 Subject: [PATCH 026/152] got rid of all global variables --- .../chatbot_parser.py | 125 +++++++++--------- 1 file changed, 65 insertions(+), 60 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index fb7d8a8b176..eb30cb7fb6a 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -5,30 +5,6 @@ import yaml from jinja2 import FileSystemLoader, Environment, ChoiceLoader -# variables for analytics -succeeded = 0 -failed = 0 - -################### define global variables ################### - -# variable that keeps track of the source directories -source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] - -# list of all the filenames -filenames_generic = {} -filenames_linux = {} -for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if "linux-tutorial" in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) - -# some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc -problem_files = ["getting_started.md", "navigating.md"] - ################### define functions ################### def remove_directory_tree(old_directory): @@ -42,7 +18,7 @@ def remove_directory_tree(old_directory): shutil.rmtree(old_directory) -def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial_, in_code_block_): +def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): """ function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables @@ -51,13 +27,15 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs :param last_directory: the most recently encountered directory :param last_title: the most recently encountered title :param curr_dirs: the most recent directories at each title level + :param root_dirs: a list containing the root directories + param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial :param in_code_block_: boolean to indicate whether the current line is part of a codeblock :return: the depth of the title :return: the title found in the line if any :return: the new directory in which the next file will be written + :return link_lists: updated link_lists """ - global links_generic, links_linux, links_windows, links_macos # detect titles match = re.match(r'^#+ ', curr_line) @@ -68,35 +46,35 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs # make necessary changes if a title has been detected if logic_output == 0 or in_code_block_: - return 0, None, None, curr_dirs + return 0, None, None, curr_dirs, link_lists else: if last_title is not None: - write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", - links_linux, is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), "Windows", - links_windows, is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", - links_macos, is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), "", link_lists[0], is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), "Linux", + link_lists[1], is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), "Windows", + link_lists[2], is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), "macOS", + link_lists[3], is_linux_tutorial_, main_title, last_title) # reset the link lists - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] + link_lists[0] = [] + link_lists[1] = [] + link_lists[2] = [] + link_lists[3] = [] curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) - create_directory(os.path.join(root_dir_generic, curr_dirs[logic_output])) - create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[logic_output])) - create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[logic_output])) - create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[logic_output])) + create_directory(os.path.join(root_dirs[0], curr_dirs[logic_output])) + create_directory(os.path.join(root_dirs[1], curr_dirs[logic_output])) + create_directory(os.path.join(root_dirs[2], curr_dirs[logic_output])) + create_directory(os.path.join(root_dirs[3], curr_dirs[logic_output])) # update the lower order current directories for i in range(logic_output + 1, 4): curr_dirs[i] = curr_dirs[logic_output] - return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs + return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists def create_directory(new_directory): @@ -333,28 +311,30 @@ def check_if_statements(curr_line, active_OS_if_states): return "write_text", None, curr_line -def write_text_to_file(file_name, curr_line): +def write_text_to_file(file_name, curr_line, link_lists): """ function that writes a line to a file :param file_name: target file to write the line to :param curr_line: line to be written to the file - :return: + :param link_lists: list containing all the links that will be printed at the end of files + :return link_lists: updated link_lists """ - global links_generic, links_linux, links_windows, links_macos with open(file_name, "a") as write_file: if "generic" in file_name: - curr_line, links_generic = replace_markdown_markers(curr_line, links_generic) + curr_line, links_generic = replace_markdown_markers(curr_line, link_lists[0]) elif "linux" in file_name: - curr_line, links_linux = replace_markdown_markers(curr_line, links_linux) + curr_line, links_linux = replace_markdown_markers(curr_line, link_lists[1]) elif "windows" in file_name: - curr_line, links_windows = replace_markdown_markers(curr_line, links_windows) + curr_line, links_windows = replace_markdown_markers(curr_line, link_lists[2]) else: - curr_line, links_macos = replace_markdown_markers(curr_line, links_macos) + curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3]) write_file.write(curr_line) + return link_lists -def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title): + +def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title, root_dirs, link_lists): """ function that decides what file to write text to @@ -362,18 +342,22 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las :param active_OS_if_states: dictionary keeping track of which OSes are active according to the if-statements :param last_directory: most recently made directory :param last_title: the most recently encountered title - :return: + :param root_dirs: a list with all root directories + :param link_lists: list of links that need to be written at the end of the files + :return link_lists: an updated link_lists """ # check that the line is part of the website for gent if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ active_OS_if_states["macos"] == "inactive": - write_text_to_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), curr_line) + link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists) if active_OS_if_states["linux"] == "active": - write_text_to_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), curr_line) + link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists) if active_OS_if_states["windows"] == "active": - write_text_to_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), curr_line) + link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), curr_line, link_lists) if active_OS_if_states["macos"] == "active": - write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line) + link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), curr_line, link_lists) + + return link_lists def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_title, last_title): @@ -434,7 +418,6 @@ def main(): main function :return: """ - global root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, links_generic, links_linux, links_windows, links_macos # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason remove_directory_tree("parsed_mds") remove_directory_tree("copies") @@ -453,6 +436,27 @@ def main(): if not os.path.exists("if_mangled_files"): os.mkdir("if_mangled_files") + ################### define loop-invariant variables ################### + + # variable that keeps track of the source directories + source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), + os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] + + # list of all the filenames + filenames_generic = {} + filenames_linux = {} + for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + if "linux-tutorial" in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) + + # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc (temporary, should be taken out when the original files have been converted to proper markdown) + problem_files = ["getting_started.md", "navigating.md"] + for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): ################### define/reset loop specific variables ################### @@ -499,6 +503,7 @@ def main(): links_linux = [] links_windows = [] links_macos = [] + link_lists = [links_generic, links_linux, links_windows, links_macos] # dictionaries to keep track of current OS active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} @@ -533,7 +538,7 @@ def main(): with open(copy_file, 'r') as readfile: for line in readfile: - title_level, title, directory, curr_dirs = check_for_title(line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial, in_code_block) + title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block @@ -551,11 +556,11 @@ def main(): next_action = check_if_statements(line, active_OS_if_states) while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title) + link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists) next_action = check_if_statements(next_action[1], active_OS_if_states) if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title) + link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists) # write end of file for the last file write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, From 9163a759c3d06cc7eb10185b10f177baa86f2294 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 09:45:27 +0200 Subject: [PATCH 027/152] small changes to make file more readable --- .../chatbot_parser.py | 51 ++++++++----------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index eb30cb7fb6a..cb3ed26fda8 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -48,29 +48,23 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs if logic_output == 0 or in_code_block_: return 0, None, None, curr_dirs, link_lists else: + + # if a new title is detected, write the end of the previous file if last_title is not None: - write_end_of_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), "", link_lists[0], is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), "Linux", - link_lists[1], is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), "Windows", - link_lists[2], is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), "macOS", - link_lists[3], is_linux_tutorial_, main_title, last_title) - - # reset the link lists - link_lists[0] = [] - link_lists[1] = [] - link_lists[2] = [] - link_lists[3] = [] + for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): + write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title) + + # reset the link lists for each OS + for i in range(4): + link_lists[i] = [] + # make a new directory corresponding with the new title curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) - create_directory(os.path.join(root_dirs[0], curr_dirs[logic_output])) - create_directory(os.path.join(root_dirs[1], curr_dirs[logic_output])) - create_directory(os.path.join(root_dirs[2], curr_dirs[logic_output])) - create_directory(os.path.join(root_dirs[3], curr_dirs[logic_output])) + for i in range(4): + create_directory(os.path.join(root_dirs[i], curr_dirs[logic_output])) - # update the lower order current directories + # update the higher order current directories for i in range(logic_output + 1, 4): curr_dirs[i] = curr_dirs[logic_output] @@ -152,6 +146,7 @@ def jinja_parser(filename, copy_location): def mangle_os_ifs(line, is_os): """ function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. + We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. :param line: the current line to check for os-related if-statements :param is_os: boolean keep track of the current os-state of the if-statements. Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} @@ -198,7 +193,7 @@ def mangle_os_ifs(line, is_os): is_os = 0 else: if is_os == 2 or is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[constr_match.end() + start_index + added_length - 1:] + line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] added_length += 8 start_index += constr_match.end() @@ -247,7 +242,7 @@ def check_if_statements(curr_line, active_OS_if_states): if match: content = match.group(1) - # new if-statement wrt OS + # new if-statement wrt OS with '==' if re.search(r'if OS == ', content): OS = content.split()[-1] @@ -259,6 +254,7 @@ def check_if_statements(curr_line, active_OS_if_states): if other_OS != OS and active_OS_if_states[other_OS] == "active": active_OS_if_states[other_OS] = "inactive" + # new if-statement wrt OS with '!=' elif re.search(r'if OS != ', content): OS = content.split()[-1] @@ -347,8 +343,7 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las :return link_lists: an updated link_lists """ # check that the line is part of the website for gent - if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ - active_OS_if_states["macos"] == "inactive": + if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists) if active_OS_if_states["linux"] == "active": link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists) @@ -457,6 +452,7 @@ def main(): # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc (temporary, should be taken out when the original files have been converted to proper markdown) problem_files = ["getting_started.md", "navigating.md"] + # for loops over all files for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): ################### define/reset loop specific variables ################### @@ -540,6 +536,7 @@ def main(): for line in readfile: title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) + # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block @@ -563,14 +560,8 @@ def main(): link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists) # write end of file for the last file - write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, - is_linux_tutorial, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", - links_linux, is_linux_tutorial, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), - "Windows", links_windows, is_linux_tutorial, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", - links_macos, is_linux_tutorial, main_title, last_title) + for OS in ["", "Linux", "Windows", "macOS"]: + write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), OS, links_generic, is_linux_tutorial, main_title, last_title) # remove_directory_tree("copies") # remove_directory_tree("if_mangled_files") From 1dcffc1bac4ee341556ef29c3557bb21686eaf2d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 11:45:16 +0200 Subject: [PATCH 028/152] codeblocks, tips, warnings and info reformatted --- .../chatbot_parser.py | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index cb3ed26fda8..d8d8000bdcc 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -98,10 +98,37 @@ def replace_markdown_markers(curr_line, linklist): curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") linklist.append(match[1]) - # TODO: code-blocks - # TODO: tips - # TODO: warnings - # etc + # codeblock (with ``` -> always stands on a separate line, so line can be dropped) + if '```' in curr_line: + curr_line = "" + + # structures within <> + match = re.findall(r'<(.*?)>', curr_line) + if match: + for i, content in enumerate(match): + exception_words = ['SEQUENCE', 'vsc40000', 'Session', 'OUTPUT_DIR', 'jobname', 'jobid', 'hostname', 'Enjoy the day!', 'stdout', 'stderr', 'coursecode', 'year', 'nickname', '01', 'number of ', 'user', 'home', 'software', 'module'] + if '#include' in curr_line: + pass + elif '.' in content: + curr_line = re.sub(f'<{content}>', f"{content}", curr_line) + elif '***' in content: + curr_line = re.sub(r'<\*\*\*', "", re.sub(r'\*\*\*\\>', "", curr_line)) + elif '-' in content and ' ' not in content: + curr_line = re.sub(f'<{content}>', f"{content}", curr_line) + # sometimes normal words are between <> brackets and should be excluded (ugly fix) + elif any(substring in content for substring in exception_words): + pass + # special cases that messed up the formatting (ugly fix) + elif ' files', "", curr_line) + elif '<>' in curr_line: + pass + else: + curr_line = re.sub(r'<.*?>', "", curr_line) + + # structures with !!! (info, tips, warnings) + if '!!!' in curr_line: + curr_line = re.sub(r'!!!', "", curr_line) return curr_line, linklist @@ -327,6 +354,9 @@ def write_text_to_file(file_name, curr_line, link_lists): curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3]) write_file.write(curr_line) + # if re.search(r'<.*?>', curr_line): + # print(curr_line) + return link_lists @@ -567,6 +597,7 @@ def main(): # remove_directory_tree("if_mangled_files") +print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() # TODO: reconsider maximum depth to be detected as title (now at four) # TODO: adapt script to be used from command line From 4d7fbdb193e14fc8d93dd914748409aff0d2170c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 11:48:59 +0200 Subject: [PATCH 029/152] small optimisations --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index d8d8000bdcc..91198ea777e 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -28,7 +28,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs :param last_title: the most recently encountered title :param curr_dirs: the most recent directories at each title level :param root_dirs: a list containing the root directories - param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file + :param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial :param in_code_block_: boolean to indicate whether the current line is part of a codeblock :return: the depth of the title @@ -427,7 +427,7 @@ def make_valid_title(title): invalid_chars = r'[<>:"/\\|?*\0()]' # get rid of extra information between {} brackets - s = re.sub(r'\{.*?}', '', title) + title = re.sub(r'\{.*?}', '', title) # Remove invalid characters valid_filename = re.sub(invalid_chars, '', title) @@ -513,16 +513,11 @@ def main(): main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(5)] - - # variable to keep track whether we're dealing with OS-specific info or not - OS_specific = False + curr_dirs = [filename[:-3] for _ in range(5)] # variable that keeps track of the latest non-zero level title and corresponding directory - last_title_level = 1 last_title = None last_directory = None - last_was_title = False # list to keep track of links in the text links_generic = [] @@ -572,7 +567,6 @@ def main(): # line is a title with a maximum depth of 4 if title_level > 0: - last_title_level = title_level last_title = title last_directory = directory after_first_title = True From 671f7f3b5e57a2643e87a65a8b449e068176261d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 11:50:18 +0200 Subject: [PATCH 030/152] small optimisations --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 91198ea777e..70ba8b17ee5 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -354,9 +354,6 @@ def write_text_to_file(file_name, curr_line, link_lists): curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3]) write_file.write(curr_line) - # if re.search(r'<.*?>', curr_line): - # print(curr_line) - return link_lists @@ -587,8 +584,8 @@ def main(): for OS in ["", "Linux", "Windows", "macOS"]: write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), OS, links_generic, is_linux_tutorial, main_title, last_title) - # remove_directory_tree("copies") - # remove_directory_tree("if_mangled_files") + remove_directory_tree("copies") + remove_directory_tree("if_mangled_files") print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") From e5c39bd2dd5f7e708b802f193656c20dfaa41253 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 12:04:39 +0200 Subject: [PATCH 031/152] initial commit --- scripts/HPC_chatbot_preprocessor/README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/README.md diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md new file mode 100644 index 00000000000..32ec81c2fa5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -0,0 +1,16 @@ +# Chatbot parser + +`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory for a chatbot to be trained on. + +## Generated file structure + +This directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: + +- `generic` contains the parts of the markdown sources that were non-OS-specific +- `os_specific` contains the parts of the markdown sources that were OS-specific + +Withing `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. + +These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. + +Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on . \ No newline at end of file From c6492fc14120e9391507a8363bcd9a82976766e8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 13:03:46 +0200 Subject: [PATCH 032/152] added requirements --- scripts/HPC_chatbot_preprocessor/requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/requirements.txt diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt new file mode 100644 index 00000000000..19ed8a2a29d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -0,0 +1,6 @@ +os +re +shutil +pypandoc +yaml +jinja2 \ No newline at end of file From aff8198d90ed64b044e837fd672c0019b88520d8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 13:04:08 +0200 Subject: [PATCH 033/152] added requirements and usage info --- scripts/HPC_chatbot_preprocessor/README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 32ec81c2fa5..e1e12046dd5 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -13,4 +13,17 @@ Withing `os_specific` a further distinction is made for each of the three possib These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. -Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on . \ No newline at end of file +Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on . + +## Requirements + +- The required Python packages are listed in `requirements.txt` +- [Pandoc](https://pandoc.org/installing.html) must be installed and must be added to the system PATH + +## Usage + +The script can be ran in a shell environment with the following command: + +```shell +python chatbot_parser.py +``` \ No newline at end of file From a981002d1cd8eab50a69d860838084b768f538e8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 13:04:27 +0200 Subject: [PATCH 034/152] minor changes to the print statements --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 70ba8b17ee5..6cb74a5c9be 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -590,5 +590,4 @@ def main(): print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() -# TODO: reconsider maximum depth to be detected as title (now at four) -# TODO: adapt script to be used from command line +print("Parsing finished successfully") From 1f3b3432fdba5390befbfd2109fa1b698c5b0728 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 10:53:10 +0200 Subject: [PATCH 035/152] reworked function to take care of html structures --- .../chatbot_parser.py | 56 +++++++++++++------ 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 6cb74a5c9be..d5e950973ec 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -4,6 +4,7 @@ import pypandoc import yaml from jinja2 import FileSystemLoader, Environment, ChoiceLoader +from itertools import chain ################### define functions ################### @@ -91,6 +92,13 @@ def replace_markdown_markers(curr_line, linklist): :return curr_line: the adapted current line :return linklist: the updated linklist """ + + # TODO: filter out images before links + # replace images with an empty line + if re.match(r'!\[image]\(.*?\)', curr_line) or re.match(r'!\[]\(img/.*?.png\)', curr_line): + print(curr_line) + curr_line = "" + # replace links with a reference matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: @@ -106,25 +114,36 @@ def replace_markdown_markers(curr_line, linklist): match = re.findall(r'<(.*?)>', curr_line) if match: for i, content in enumerate(match): - exception_words = ['SEQUENCE', 'vsc40000', 'Session', 'OUTPUT_DIR', 'jobname', 'jobid', 'hostname', 'Enjoy the day!', 'stdout', 'stderr', 'coursecode', 'year', 'nickname', '01', 'number of ', 'user', 'home', 'software', 'module'] - if '#include' in curr_line: - pass - elif '.' in content: - curr_line = re.sub(f'<{content}>', f"{content}", curr_line) - elif '***' in content: - curr_line = re.sub(r'<\*\*\*', "", re.sub(r'\*\*\*\\>', "", curr_line)) - elif '-' in content and ' ' not in content: - curr_line = re.sub(f'<{content}>', f"{content}", curr_line) - # sometimes normal words are between <> brackets and should be excluded (ugly fix) - elif any(substring in content for substring in exception_words): - pass - # special cases that messed up the formatting (ugly fix) + syntax_words = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] # make sure these are always lowercase + syntax_words_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in syntax_words])) + syntax_words_style = [element + " style=.*" for element in syntax_words] + + # add references for every link of format + if re.search(r'a href=.*', content): + link = content[8:-1] + curr_line = re.sub(f'<{content}>', "[" + str(len(linklist) + 1) + "]", curr_line) + linklist.append(link) + + # drop the syntax words + elif content.lower() in syntax_words_variations: + curr_line = re.sub(f'<{content}>', "", curr_line) + + # drop the version of the syntax_words followed by " style=" + elif any(re.match(pattern, content) for pattern in syntax_words_style): + curr_line = re.sub(r'<.*?>', "", curr_line) + + # drop markdown comments + elif re.fullmatch(r'!--.*?--', content): + curr_line = re.sub(r'<.*?>', "", curr_line) + + # special case (ugly fix) elif ' files', "", curr_line) - elif '<>' in curr_line: - pass + + # keep the rest else: - curr_line = re.sub(r'<.*?>', "", curr_line) + # print("<" + content + ">") + pass # structures with !!! (info, tips, warnings) if '!!!' in curr_line: @@ -505,6 +524,7 @@ def main(): root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux") root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows") root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos") + root_dirs = [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos] # variable for the main title (needed for reference links) main_title = filename[:-3] @@ -581,8 +601,8 @@ def main(): link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists) # write end of file for the last file - for OS in ["", "Linux", "Windows", "macOS"]: - write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), OS, links_generic, is_linux_tutorial, main_title, last_title) + for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): + write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) remove_directory_tree("copies") remove_directory_tree("if_mangled_files") From 48cad9779f0ed2a492027330b5af531cf0631079 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 11:26:43 +0200 Subject: [PATCH 036/152] filter out images --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index d5e950973ec..2408557fd49 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -93,10 +93,8 @@ def replace_markdown_markers(curr_line, linklist): :return linklist: the updated linklist """ - # TODO: filter out images before links # replace images with an empty line - if re.match(r'!\[image]\(.*?\)', curr_line) or re.match(r'!\[]\(img/.*?.png\)', curr_line): - print(curr_line) + if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[]\(img/.*?.png\)', curr_line): curr_line = "" # replace links with a reference @@ -495,9 +493,6 @@ def main(): else: filenames_generic[file] = os.path.join(source_directory, file) - # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc (temporary, should be taken out when the original files have been converted to proper markdown) - problem_files = ["getting_started.md", "navigating.md"] - # for loops over all files for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): @@ -568,10 +563,6 @@ def main(): # process the jinja macros jinja_parser(filename, copy_file) - # convert the files without proper markdown layout into markdown using pandoc - if "linux-tutorial" in filenames[filename] and filename in problem_files: - pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) - # open the file and store line by line in the right file with open(copy_file, 'r') as readfile: From df58f233e125078552318647815054bdfdff0bcb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 13:28:28 +0200 Subject: [PATCH 037/152] get rid of backquotes, asterisks, pluses and underscores used for formatting --- .../chatbot_parser.py | 53 ++++++++++++++----- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 2408557fd49..72d8c251c55 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,7 +1,6 @@ import os import re import shutil -import pypandoc import yaml from jinja2 import FileSystemLoader, Environment, ChoiceLoader from itertools import chain @@ -83,12 +82,13 @@ def create_directory(new_directory): os.mkdir(new_directory) -def replace_markdown_markers(curr_line, linklist): +def replace_markdown_markers(curr_line, linklist, in_code_block): """ function that replaces certain markdown structures with the equivalent used on the website :param curr_line: the current line on which markdown structures need to be replaced :param linklist: the list used to store links that need to be printed at the end of the file + :param in_code_block: boolean indicating whether the current line is part of a code block :return curr_line: the adapted current line :return linklist: the updated linklist """ @@ -147,6 +147,29 @@ def replace_markdown_markers(curr_line, linklist): if '!!!' in curr_line: curr_line = re.sub(r'!!!', "", curr_line) + # get rid of other markdown indicators (`, *, +, _) + if not in_code_block: + + backquotes = re.findall(r'`(.*?)`', curr_line) + if backquotes: + for i, content in enumerate(backquotes): + curr_line = curr_line.replace(f"`{content}`", content) + + asterisks = re.findall(r'(? Date: Fri, 16 Aug 2024 14:22:01 +0200 Subject: [PATCH 038/152] dump to json files instead of txt files --- .../chatbot_parser.py | 78 ++++++++++++------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 72d8c251c55..b6061ef9d90 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,9 +1,10 @@ +import json import os import re import shutil import yaml -from jinja2 import FileSystemLoader, Environment, ChoiceLoader from itertools import chain +from jinja2 import FileSystemLoader, Environment, ChoiceLoader ################### define functions ################### @@ -52,7 +53,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs # if a new title is detected, write the end of the previous file if last_title is not None: for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title) # reset the link lists for each OS for i in range(4): @@ -384,16 +385,30 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block): :param in_code_block: boolean indicating whether the current line is in a codeblock :return link_lists: updated link_lists """ - with open(file_name, "a") as write_file: + + if os.path.exists(file_name) or curr_line.strip(): + if os.path.exists(file_name): + with open(file_name, "r") as read_file: + data = json.load(read_file) + else: + data = {} + if "generic" in file_name: - curr_line, links_generic = replace_markdown_markers(curr_line, link_lists[0], in_code_block) + curr_line, link_lists[0] = replace_markdown_markers(curr_line, link_lists[0], in_code_block) elif "linux" in file_name: - curr_line, links_linux = replace_markdown_markers(curr_line, link_lists[1], in_code_block) + curr_line, link_lists[1] = replace_markdown_markers(curr_line, link_lists[1], in_code_block) elif "windows" in file_name: - curr_line, links_windows = replace_markdown_markers(curr_line, link_lists[2], in_code_block) + curr_line, link_lists[2] = replace_markdown_markers(curr_line, link_lists[2], in_code_block) + else: + curr_line, link_lists[3] = replace_markdown_markers(curr_line, link_lists[3], in_code_block) + + if 'content' in data: + data['content'] += curr_line else: - curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3], in_code_block) - write_file.write(curr_line) + data['content'] = curr_line + + with open(file_name, "w") as write_file: + json.dump(data, write_file, indent=4) return link_lists @@ -413,13 +428,13 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las """ # check that the line is part of the website for gent if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": - link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block) + link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) if active_OS_if_states["linux"] == "active": - link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block) + link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) if active_OS_if_states["windows"] == "active": - link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block) + link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) if active_OS_if_states["macos"] == "active": - link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block) + link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) return link_lists @@ -436,23 +451,30 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_titl :param last_title: the most recently encountered title :return: """ - if len(OS) > 0: - OS = OS + "/" - # add the links from within the document - with open(file_location, 'a') as write_file: - write_file.write("\n\n") + if os.path.exists(file_location): + + if len(OS) > 0: + OS = OS + "/" + + with open(file_location, "r") as read_file: + data = json.load(read_file) + + # add the links from within the document + data['links'] = {} for i, link in enumerate(linklist): - write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n") + data['links'][str(i + 1)] = str(link) - if is_linux_tutorial_: - linux_part = "linux-tutorial/" - else: - linux_part = "" + if is_linux_tutorial_: + linux_part = "linux-tutorial/" + else: + linux_part = "" + + # add the reference link + data['reference_link'] = ("docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) - # finally add the reference link - with open(file_location, 'a') as write_file: - write_file.write("\nreference: docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-') + "\n") + with open(file_location, 'w') as write_file: + json.dump(data, write_file, indent=4) def make_valid_title(title): @@ -618,10 +640,10 @@ def main(): # write end of file for the last file for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) - remove_directory_tree("copies") - remove_directory_tree("if_mangled_files") + # remove_directory_tree("copies") + # remove_directory_tree("if_mangled_files") print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") From 2c333fea2e36229a6db8fd7d85ce906ae0479c8c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 15:44:07 +0200 Subject: [PATCH 039/152] cleaned up parser with macros --- .../chatbot_parser.py | 284 ++++++++++-------- 1 file changed, 158 insertions(+), 126 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b6061ef9d90..b36f5c3c471 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -6,19 +6,55 @@ from itertools import chain from jinja2 import FileSystemLoader, Environment, ChoiceLoader - -################### define functions ################### -def remove_directory_tree(old_directory): - """ - function that removes a full directory tree - - :param old_directory: the directory to be removed - :return: - """ - if os.path.exists(old_directory): - shutil.rmtree(old_directory) +#################### define macro's #################### +# directories +PARSED_MDS = "parsed_mds" +COPIES = "copies" +IF_MANGLED_FILES = "if_mangled_files" +LINUX_TUTORIAL = "linux-tutorial" +RETURN_DIR = ".." +MKDOCS_DIR = "mkdocs" +DOCS_DIR = "docs" +HPC_DIR = "HPC" +EXTRA_DIR = "extra" +GENERIC_DIR = "generic" +OS_SPECIFIC_DIR = "os_specific" + +# OSes +LINUX = "linux" +WINDOWS = "windows" +MACOS = "macos" + +# urls +REPO_URL = 'https://github.com/hpcugent/vsc_user_docs' +DOCS_URL = "docs.hpc.ugent.be" + +# OS-related if-states +ACTIVE = "active" +INACTIVE = "inactive" + +# if mangler states +NON_OS_IF = 0 +NON_OS_IF_IN_OS_IF = 1 +OS_IF = 2 +OS_IF_IN_OS_IF = 3 + +# if mangler macros +IF_MANGLED_PART = "-if-" + +# actions +DONE = "done" +WRITE_TEXT = "write_text" +CHECK_EXTRA_MESSAGE = "check_extra_message" +WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message" + +# JSON attributes +CONTENT = "content" +LINKS = "links" +REFERENCE_LINK = "reference_link" +################### define functions ################### def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): """ function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables @@ -63,7 +99,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) for i in range(4): - create_directory(os.path.join(root_dirs[i], curr_dirs[logic_output])) + os.makedirs(os.path.join(root_dirs[i], curr_dirs[logic_output]), exist_ok=True) # update the higher order current directories for i in range(logic_output + 1, 4): @@ -72,17 +108,6 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists -def create_directory(new_directory): - """ - function that creates new directories - - :param new_directory: directory to be created - :return: - """ - if not os.path.exists(new_directory): - os.mkdir(new_directory) - - def replace_markdown_markers(curr_line, linklist, in_code_block): """ function that replaces certain markdown structures with the equivalent used on the website @@ -183,16 +208,16 @@ def jinja_parser(filename, copy_location): :return: """ # YAML file location - yml_file_path = os.path.join('..', '..', 'mkdocs', 'extra', 'gent.yml') + yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml') # Read the YAML file with open(yml_file_path, 'r') as yml_file: words_dict = yaml.safe_load(yml_file) - # ugly fix for index.md error + # ugly fix for index.md error that occurs because of the macro "config.repo_url" in mkdocs/docs/HPC/index.md additional_context = { 'config': { - 'repo_url': 'https://github.com/hpcugent/vsc_user_docs' + 'repo_url': REPO_URL } } combined_context = {**words_dict, **additional_context} @@ -201,7 +226,7 @@ def jinja_parser(filename, copy_location): mangle_ifs(copy_location, filename) # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath='if_mangled_files'), FileSystemLoader(searchpath=os.path.join("..", "..", "mkdocs", "docs", "HPC"))]) + template_loader = ChoiceLoader([FileSystemLoader(searchpath=IF_MANGLED_FILES), FileSystemLoader(searchpath=os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR))]) templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) @@ -217,7 +242,11 @@ def mangle_os_ifs(line, is_os): We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. :param line: the current line to check for os-related if-statements - :param is_os: boolean keep track of the current os-state of the if-statements. Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} + :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF + NON_OS_IF: not in an os-if + NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if + OS_IF: in an os-if + OS_IF_IN_OS_IF: in an os-if nested in an os-if} :return line: the modified line with mangled os-related if-statements """ @@ -232,37 +261,46 @@ def mangle_os_ifs(line, is_os): if_match = re.search(r'if ', match.group(1)) if_os_match = re.search(r'if OS ', match.group(1)) endif_match = re.search(r'endif', match.group(1)) + + # mangle positions pos_first_mangle = constr_match.start() + start_index + added_length + 1 pos_second_mangle = constr_match.end() + start_index + added_length - 1 + # different parts of the original string + PART_BEFORE_MANGLING = line[:pos_first_mangle] + PART_BETWEEN_MANGLING = line[pos_first_mangle:pos_second_mangle] + PART_AFTER_MANGLING = line[pos_second_mangle:] + # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: - if is_os == 2 or is_os == 3: - line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] - added_length += 8 - if is_os == 2: - is_os = 0 - elif is_os == 3: - is_os = 2 - elif is_os == 1: - is_os = 2 + if is_os == OS_IF or is_os == OS_IF_IN_OS_IF: + line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + added_length += 2 * len(IF_MANGLED_PART) + if is_os == OS_IF: + is_os = NON_OS_IF + elif is_os == OS_IF_IN_OS_IF: + is_os = OS_IF + elif is_os == NON_OS_IF_IN_OS_IF: + is_os = OS_IF + elif if_match: if if_os_match: - line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] - added_length += 8 - if is_os == 2: - is_os = 3 + line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + added_length += 2 * len(IF_MANGLED_PART) + if is_os == OS_IF: + is_os = OS_IF_IN_OS_IF else: - is_os = 2 + is_os = OS_IF else: - if is_os == 2: - is_os = 1 + if is_os == OS_IF: + is_os = NON_OS_IF_IN_OS_IF else: - is_os = 0 + is_os = NON_OS_IF + else: - if is_os == 2 or is_os == 3: - line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] - added_length += 8 + if is_os == OS_IF or is_os == OS_IF_IN_OS_IF: + line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + added_length += 2 * len(IF_MANGLED_PART) start_index += constr_match.end() match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) @@ -278,9 +316,9 @@ def mangle_ifs(directory, filename): :return: """ # variable to keep track of latest if-statement scope - is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} + is_os = NON_OS_IF - with open(os.path.join("if_mangled_files", filename), 'w') as write_file: + with open(os.path.join(IF_MANGLED_FILES, filename), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: new_line, is_os = mangle_os_ifs(line, is_os) @@ -294,18 +332,18 @@ def check_if_statements(curr_line, active_OS_if_states): :param curr_line: the line to be checked for if-statements to build the directory structure :param active_OS_if_states: dictionary keeping track of the active OS states according to the if-statements :return: the next action to be done with the line: - "done": An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. - "check_extra_message": An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. - "write_text": No if-statement has been found, write the current line to a file (can also be part of the current line) - "write_text_and_check_extra_message": An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line. + DONE: An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. + CHECK_EXTRA_MESSAGE: An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. + WRITE_TEXT: No if-statement has been found, write the current line to a file (can also be part of the current line) + WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line. :return: the extra message to be checked, if any :return: the text to be written to the file, if any """ # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{-if-%(.*?)%-if-}(.*)', curr_line) + match = re.search(r'^\{' + IF_MANGLED_PART + '%(.*?)%' + IF_MANGLED_PART + '}(.*)', curr_line) # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{-if-%.*?%-if-})(.*)', curr_line) + match_large = re.search(r'^(.*)(\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '})(.*)', curr_line) if match: content = match.group(1) @@ -315,33 +353,33 @@ def check_if_statements(curr_line, active_OS_if_states): OS = content.split()[-1] # set new active OS - active_OS_if_states[OS] = "active" + active_OS_if_states[OS] = ACTIVE # set other active ones on inactive for other_OS in active_OS_if_states.keys(): - if other_OS != OS and active_OS_if_states[other_OS] == "active": - active_OS_if_states[other_OS] = "inactive" + if other_OS != OS and active_OS_if_states[other_OS] == ACTIVE: + active_OS_if_states[other_OS] = INACTIVE # new if-statement wrt OS with '!=' elif re.search(r'if OS != ', content): OS = content.split()[-1] # set new active OS - active_OS_if_states[OS] = "inactive" + active_OS_if_states[OS] = INACTIVE # set other inactive ones on active for other_OS in active_OS_if_states.keys(): - if other_OS != OS and active_OS_if_states[other_OS] == "inactive": - active_OS_if_states[other_OS] = "active" + if other_OS != OS and active_OS_if_states[other_OS] == INACTIVE: + active_OS_if_states[other_OS] = ACTIVE # endif statement wrt OS elif re.search(r'endif', content): if str(1) in active_OS_if_states.values(): active_OS_if_states[ - list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" + list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = ACTIVE else: for key in active_OS_if_states.keys(): - active_OS_if_states[key] = "inactive" + active_OS_if_states[key] = INACTIVE # else statement wrt OS elif re.search(r'else', content): @@ -353,26 +391,26 @@ def check_if_statements(curr_line, active_OS_if_states): # set the previously active one on inactive until the next endif key_list = list(active_OS_if_states.keys()) - position = list(active_OS_if_states.values()).index("active") + position = list(active_OS_if_states.values()).index(ACTIVE) active_OS_if_states[key_list[position]] = str(i) # set inactive ones on active - while "inactive" in active_OS_if_states.values(): - position = list(active_OS_if_states.values()).index("inactive") - active_OS_if_states[key_list[position]] = "active" + while INACTIVE in active_OS_if_states.values(): + position = list(active_OS_if_states.values()).index(INACTIVE) + active_OS_if_states[key_list[position]] = ACTIVE if len(match.group(2)) != 0: extra_message = match.group(2).lstrip() - return "check_extra_message", extra_message, None + return CHECK_EXTRA_MESSAGE, extra_message, None else: - return "done", None, None + return DONE, None, None elif match_large: - return "write_text_and_check_extra_message", match_large.group(2), match_large.group(1) + return WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE, match_large.group(2), match_large.group(1) else: - return "write_text", None, curr_line + return WRITE_TEXT, None, curr_line def write_text_to_file(file_name, curr_line, link_lists, in_code_block): @@ -393,19 +431,19 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block): else: data = {} - if "generic" in file_name: + if GENERIC_DIR in file_name: curr_line, link_lists[0] = replace_markdown_markers(curr_line, link_lists[0], in_code_block) - elif "linux" in file_name: + elif LINUX in file_name: curr_line, link_lists[1] = replace_markdown_markers(curr_line, link_lists[1], in_code_block) - elif "windows" in file_name: + elif WINDOWS in file_name: curr_line, link_lists[2] = replace_markdown_markers(curr_line, link_lists[2], in_code_block) else: curr_line, link_lists[3] = replace_markdown_markers(curr_line, link_lists[3], in_code_block) - if 'content' in data: - data['content'] += curr_line + if CONTENT in data: + data[CONTENT] += curr_line else: - data['content'] = curr_line + data[CONTENT] = curr_line with open(file_name, "w") as write_file: json.dump(data, write_file, indent=4) @@ -427,13 +465,13 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las :return link_lists: an updated link_lists """ # check that the line is part of the website for gent - if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": + if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states["linux"] == "active": + if active_OS_if_states[LINUX] == ACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states["windows"] == "active": + if active_OS_if_states[WINDOWS] == ACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states["macos"] == "active": + if active_OS_if_states[MACOS] == ACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) return link_lists @@ -461,17 +499,17 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_titl data = json.load(read_file) # add the links from within the document - data['links'] = {} + data[LINKS] = {} for i, link in enumerate(linklist): - data['links'][str(i + 1)] = str(link) + data[LINKS][str(i + 1)] = str(link) if is_linux_tutorial_: - linux_part = "linux-tutorial/" + linux_part = LINUX_TUTORIAL + "/" else: linux_part = "" # add the reference link - data['reference_link'] = ("docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) + data[REFERENCE_LINK] = (DOCS_URL + "/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) with open(file_location, 'w') as write_file: json.dump(data, write_file, indent=4) @@ -505,28 +543,28 @@ def main(): :return: """ # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - remove_directory_tree("parsed_mds") - remove_directory_tree("copies") - remove_directory_tree("if_mangled_files") + shutil.rmtree(PARSED_MDS) + shutil.rmtree(COPIES) + shutil.rmtree(IF_MANGLED_FILES) # make the necessary directories - if not os.path.exists("copies"): - os.mkdir("copies") + if not os.path.exists(COPIES): + os.mkdir(COPIES) - if not os.path.exists(os.path.join("copies", "linux")): - os.mkdir(os.path.join("copies", "linux")) + if not os.path.exists(os.path.join(COPIES, LINUX_TUTORIAL)): + os.mkdir(os.path.join(COPIES, LINUX_TUTORIAL)) - if not os.path.exists("parsed_mds"): - os.mkdir("parsed_mds") + if not os.path.exists(PARSED_MDS): + os.mkdir(PARSED_MDS) - if not os.path.exists("if_mangled_files"): - os.mkdir("if_mangled_files") + if not os.path.exists(IF_MANGLED_FILES): + os.mkdir(IF_MANGLED_FILES) ################### define loop-invariant variables ################### # variable that keeps track of the source directories - source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), - os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] + source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] # list of all the filenames filenames_generic = {} @@ -535,7 +573,7 @@ def main(): all_items = os.listdir(source_directory) files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] for file in files: - if "linux-tutorial" in source_directory: + if LINUX_TUTORIAL in source_directory: filenames_linux[file] = os.path.join(source_directory, file) else: filenames_generic[file] = os.path.join(source_directory, file) @@ -546,26 +584,26 @@ def main(): ################### define/reset loop specific variables ################### # variable that keeps track of whether file is part of the linux tutorial - is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) + is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename]) # make a copy of the original file in order to make sure the original does not get altered if is_linux_tutorial: - copy_file = os.path.join("copies", "linux", filename) + copy_file = os.path.join(COPIES, LINUX_TUTORIAL, filename) else: - copy_file = os.path.join("copies", filename) + copy_file = os.path.join(COPIES, filename) shutil.copyfile(filenames[filename], copy_file) # variable that keeps track of the directories that are used to write in at different levels if is_linux_tutorial: - root_dir_generic = os.path.join("parsed_mds", "generic", "linux_tutorial") - root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux", "linux_tutorial") - root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows", "linux_tutorial") - root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos", "linux_tutorial") + root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR, LINUX_TUTORIAL) + root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX, LINUX_TUTORIAL) + root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS, LINUX_TUTORIAL) + root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS, LINUX_TUTORIAL) else: - root_dir_generic = os.path.join("parsed_mds", "generic") - root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux") - root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows") - root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos") + root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR) + root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX) + root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) + root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS) root_dirs = [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos] # variable for the main title (needed for reference links) @@ -586,7 +624,7 @@ def main(): link_lists = [links_generic, links_linux, links_windows, links_macos] # dictionaries to keep track of current OS - active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + active_OS_if_states = {LINUX: INACTIVE, WINDOWS: INACTIVE, MACOS: INACTIVE} # variable that shows whether the first title has been reached yet after_first_title = False @@ -597,15 +635,8 @@ def main(): ################### actually parse the md file ################### # create directories for the source markdown file - create_directory(root_dir_generic) - create_directory(os.path.join("parsed_mds", "os_specific")) - create_directory(root_dir_os_specific_linux) - create_directory(root_dir_os_specific_windows) - create_directory(root_dir_os_specific_macos) - create_directory(os.path.join(root_dir_generic, curr_dirs[0])) - create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[0])) - create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[0])) - create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[0])) + for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: + os.makedirs(directory, exist_ok=True) # process the jinja macros jinja_parser(filename, copy_file) @@ -630,22 +661,23 @@ def main(): elif after_first_title: # check for if-statements and write the appropriate lines in the right files next_action = check_if_statements(line, active_OS_if_states) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": + while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE: + if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) next_action = check_if_statements(next_action[1], active_OS_if_states) - if next_action[0] == "write_text": + if next_action[0] == WRITE_TEXT: link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) # write end of file for the last file for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) - # remove_directory_tree("copies") - # remove_directory_tree("if_mangled_files") + # remove_directory_tree(COPIES) + # remove_directory_tree(IF_MANGLED_FILES) +################### run the script ################### print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() print("Parsing finished successfully") From ce5235250b0e99e4a60fd03ab150b838c5e4d82e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 15:47:55 +0200 Subject: [PATCH 040/152] cleaned up parser with macros --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b36f5c3c471..b6833632267 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -273,7 +273,7 @@ def mangle_os_ifs(line, is_os): # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: - if is_os == OS_IF or is_os == OS_IF_IN_OS_IF: + if is_os in (OS_IF, OS_IF_IN_OS_IF): line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: @@ -298,7 +298,7 @@ def mangle_os_ifs(line, is_os): is_os = NON_OS_IF else: - if is_os == OS_IF or is_os == OS_IF_IN_OS_IF: + if is_os in (OS_IF, OS_IF_IN_OS_IF): line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING added_length += 2 * len(IF_MANGLED_PART) From 5db34afdb1a8f8dc4439daaefbd97c9204caa3d9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 15:52:29 +0200 Subject: [PATCH 041/152] cleaned up parser with macros --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b6833632267..a2abc77b798 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -246,7 +246,7 @@ def mangle_os_ifs(line, is_os): NON_OS_IF: not in an os-if NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if OS_IF: in an os-if - OS_IF_IN_OS_IF: in an os-if nested in an os-if} + OS_IF_IN_OS_IF: in an os-if nested in an os-if :return line: the modified line with mangled os-related if-statements """ From 4226d28ddd50e91acb6988317d42de0f881eaea4 Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Mon, 19 Aug 2024 11:17:11 +0200 Subject: [PATCH 042/152] Update README.md --- scripts/HPC_chatbot_preprocessor/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index e1e12046dd5..5e895d4d62c 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -4,12 +4,12 @@ ## Generated file structure -This directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: +The generated directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: - `generic` contains the parts of the markdown sources that were non-OS-specific - `os_specific` contains the parts of the markdown sources that were OS-specific -Withing `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. +Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. @@ -26,4 +26,4 @@ The script can be ran in a shell environment with the following command: ```shell python chatbot_parser.py -``` \ No newline at end of file +``` From d730a262f667fc00ce637d4ee7e607f201072c2f Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Mon, 19 Aug 2024 12:37:51 +0200 Subject: [PATCH 043/152] Update README.md --- scripts/HPC_chatbot_preprocessor/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 5e895d4d62c..6e7d0edc71a 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -1,6 +1,6 @@ # Chatbot parser -`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory for a chatbot to be trained on. +`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. ## Generated file structure From f3182e35b769550f9483a4e690b300c8775e494b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 13:44:46 +0200 Subject: [PATCH 044/152] added section about restrictions on input files --- scripts/HPC_chatbot_preprocessor/README.md | 92 ++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 6e7d0edc71a..23c1d87cc44 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -27,3 +27,95 @@ The script can be ran in a shell environment with the following command: ```shell python chatbot_parser.py ``` + +## Restrictions on source-files + +Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input. + + +### Nested if structures + +The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. + +#### Examples of valid and invalid if-structures + +##### Allowed + +###### non-os-related in os-related + +This is an example of one of the basic allowed if-structures + +``` +if OS == windows: + if site == Gent: + ... + endif +endif +``` + +###### os-related in os-related in non-os-related + +This is an example of a basic allowed if-structure nested in a non-os-specific if. + +``` +if site == Gent: + if OS == windows: + ... + else: + if OS == Linux: + ... + endif + endif +endif +``` + +##### Not allowed + +###### non-os-related in os-related in os-related + +This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures. + +``` +if OS != windows: + if OS == Linux: + if site == Gent: + ... + endif + endif +endif +``` + +This will result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. + +###### os-related in non-os-related in os-related + +This is an example of one of the basic allowed if-structures nested in an os-specific if-structure. + +``` +if OS != windows: + if site == Gent: + if OS == Linux: + ... + endif + endif +endif +``` + +This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. + +### Allowed html syntax + +The script contains a list of html syntax keywords it filters out. If more html syntax keywords are used in the future, it suffices to add them to this list to adapt the script to filter them out. The current list is: +``` +["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] +``` +The script is also adapted to take into consideration structures like and retain the link. + +### Markdown comments + +Any comments within the markdown files (for example TODO's) should follow the following syntax: + +``` + +``` + and should be limited to one line. From 675bec5c75d7b0cda95d61867d6a587e7ba13a19 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 13:53:32 +0200 Subject: [PATCH 045/152] adapted section about restrictions on input files --- scripts/HPC_chatbot_preprocessor/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 23c1d87cc44..6899aacf2b0 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -43,7 +43,7 @@ The script uses the if-structures in the source-files to split the documentation ###### non-os-related in os-related -This is an example of one of the basic allowed if-structures +This is an example of one of the basic allowed if-structures (`NON_OS_IF_IN_OS_IF`) ``` if OS == windows: @@ -55,7 +55,7 @@ endif ###### os-related in os-related in non-os-related -This is an example of a basic allowed if-structure nested in a non-os-specific if. +This is an example of the basic allowed if-structure `OS_IF_IN_OS_IF` nested in a non-os-specific if. ``` if site == Gent: From f1e58ef776a24eb2bb39bed1de1eb0611a0f60eb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 13:54:12 +0200 Subject: [PATCH 046/152] adapted section about restrictions on input files --- scripts/HPC_chatbot_preprocessor/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 6899aacf2b0..c18a4ebea64 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -73,7 +73,7 @@ endif ###### non-os-related in os-related in os-related -This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures. +This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures (`OS_IF_IN_OS_IF`). ``` if OS != windows: @@ -89,7 +89,7 @@ This will result in the parser "forgetting" it opened an os-specific if-statemen ###### os-related in non-os-related in os-related -This is an example of one of the basic allowed if-structures nested in an os-specific if-structure. +This is an example of the basic allowed if-structure `OS_IF` (indirectly) nested in an os-specific if-structure. ``` if OS != windows: @@ -109,7 +109,7 @@ The script contains a list of html syntax keywords it filters out. If more html ``` ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] ``` -The script is also adapted to take into consideration structures like and retain the link. +The script is also adapted to take into consideration structures like `` and retain the link. ### Markdown comments From a16850925bdcfad0f3017578082b727244d8b63b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 14:37:48 +0200 Subject: [PATCH 047/152] change variables to be lowercase --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a2abc77b798..600c2c08c1e 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -267,14 +267,14 @@ def mangle_os_ifs(line, is_os): pos_second_mangle = constr_match.end() + start_index + added_length - 1 # different parts of the original string - PART_BEFORE_MANGLING = line[:pos_first_mangle] - PART_BETWEEN_MANGLING = line[pos_first_mangle:pos_second_mangle] - PART_AFTER_MANGLING = line[pos_second_mangle:] + part_before_mangling = line[:pos_first_mangle] + part_between_mangling = line[pos_first_mangle:pos_second_mangle] + part_after_mangling = line[pos_second_mangle:] # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): - line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: is_os = NON_OS_IF @@ -285,7 +285,7 @@ def mangle_os_ifs(line, is_os): elif if_match: if if_os_match: - line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: is_os = OS_IF_IN_OS_IF @@ -299,7 +299,7 @@ def mangle_os_ifs(line, is_os): else: if is_os in (OS_IF, OS_IF_IN_OS_IF): - line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) start_index += constr_match.end() From 09b86c9fba292b76ddb0c8ecf523e5c73d87c30c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 14:49:08 +0200 Subject: [PATCH 048/152] take out some copy pasting --- .../chatbot_parser.py | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 600c2c08c1e..7b25c5a9d06 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -431,14 +431,10 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block): else: data = {} - if GENERIC_DIR in file_name: - curr_line, link_lists[0] = replace_markdown_markers(curr_line, link_lists[0], in_code_block) - elif LINUX in file_name: - curr_line, link_lists[1] = replace_markdown_markers(curr_line, link_lists[1], in_code_block) - elif WINDOWS in file_name: - curr_line, link_lists[2] = replace_markdown_markers(curr_line, link_lists[2], in_code_block) - else: - curr_line, link_lists[3] = replace_markdown_markers(curr_line, link_lists[3], in_code_block) + os_list = [GENERIC_DIR, LINUX, WINDOWS, MACOS] + for i, os_ in enumerate(os_list): + if os_ in file_name: + curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block) if CONTENT in data: data[CONTENT] += curr_line @@ -467,12 +463,12 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las # check that the line is part of the website for gent if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states[LINUX] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states[WINDOWS] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states[MACOS] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) + else: + os_list = [LINUX, WINDOWS, MACOS] + for i, os_ in enumerate(os_list): + if active_OS_if_states[os_] == ACTIVE: + link_lists = write_text_to_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), + curr_line, link_lists, in_code_block) return link_lists From f95b99e203163e5bf0514a4ae4c4af16d1dd50df Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 15:11:04 +0200 Subject: [PATCH 049/152] added warning about long filepaths --- scripts/HPC_chatbot_preprocessor/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index c18a4ebea64..0b715d85650 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -32,7 +32,6 @@ python chatbot_parser.py Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input. - ### Nested if structures The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. @@ -119,3 +118,7 @@ Any comments within the markdown files (for example TODO's) should follow the fo ``` and should be limited to one line. + +### Long filenames + +Due to the nature of this script, it can generate directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long. From 06bb7b9ea18f7cbae70190e1e939eb5952ee09b1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 15:12:21 +0200 Subject: [PATCH 050/152] fixing typos --- scripts/HPC_chatbot_preprocessor/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 0b715d85650..55996e0bef5 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -119,6 +119,6 @@ Any comments within the markdown files (for example TODO's) should follow the fo ``` and should be limited to one line. -### Long filenames +### Long filepaths -Due to the nature of this script, it can generate directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long. +Due to the nature of this script, it can generate large directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long. From 2f3e5b303a8875fe315592f792addba78f4d0e82 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 17:02:30 +0200 Subject: [PATCH 051/152] take out copy pasting --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 7b25c5a9d06..c2fe409b420 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -464,10 +464,9 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) else: - os_list = [LINUX, WINDOWS, MACOS] - for i, os_ in enumerate(os_list): + for i, os_ in enumerate([LINUX, WINDOWS, MACOS]): if active_OS_if_states[os_] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), + link_lists = write_text_to_file(os.path.join(root_dirs[i + 1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) return link_lists From 0c4dbe8e02639de7787af8109df7781053101d2a Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 20 Aug 2024 14:39:12 +0200 Subject: [PATCH 052/152] first draft version of the restructured script to accommodate for the new file format --- .../chatbot_parser.py | 302 +++++++++++++++--- .../HPC_chatbot_preprocessor/requirements.txt | 3 +- 2 files changed, 255 insertions(+), 50 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c2fe409b420..72aa40292f9 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -4,9 +4,15 @@ import shutil import yaml from itertools import chain -from jinja2 import FileSystemLoader, Environment, ChoiceLoader +from pathlib import Path +from jinja2 import FileSystemLoader, Environment, ChoiceLoader, Template #################### define macro's #################### +# customizable macros +MIN_PARAGRAPH_LENGTH = 128 +MAX_TITLE_DEPTH = 4 +INCLUDE_LINKS_IN_PLAINTEXT = True + # directories PARSED_MDS = "parsed_mds" COPIES = "copies" @@ -24,10 +30,11 @@ LINUX = "linux" WINDOWS = "windows" MACOS = "macos" +GENERIC = "generic" # urls REPO_URL = 'https://github.com/hpcugent/vsc_user_docs' -DOCS_URL = "docs.hpc.ugent.be" +DOCS_URL = "https://docs.hpc.ugent.be" # OS-related if-states ACTIVE = "active" @@ -76,7 +83,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs # detect titles match = re.match(r'^#+ ', curr_line) - if match and len(match.group(0)) <= 5: + if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1: logic_output = len(match.group(0)) - 1 else: logic_output = 0 @@ -102,19 +109,37 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs os.makedirs(os.path.join(root_dirs[i], curr_dirs[logic_output]), exist_ok=True) # update the higher order current directories - for i in range(logic_output + 1, 4): + for i in range(logic_output + 1, MAX_TITLE_DEPTH + 1): curr_dirs[i] = curr_dirs[logic_output] return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists -def replace_markdown_markers(curr_line, linklist, in_code_block): +def check_for_title_simple(line, in_code_block, curr_dirs): + + # detect titles + match = re.match(r'^#+ ', line) + if match and len(match.group(0)) <= 5 and not in_code_block: + title_length = len(match.group(0)) - 1 + curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) + + # update the higher order current directories + for i in range(title_length + 1, MAX_TITLE_DEPTH + 1): + curr_dirs[i] = curr_dirs[title_length] + + return title_length + else: + return 0 + + +def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): """ function that replaces certain markdown structures with the equivalent used on the website :param curr_line: the current line on which markdown structures need to be replaced :param linklist: the list used to store links that need to be printed at the end of the file :param in_code_block: boolean indicating whether the current line is part of a code block + :param main_title: the main title of the file that is being processed :return curr_line: the adapted current line :return linklist: the updated linklist """ @@ -128,7 +153,13 @@ def replace_markdown_markers(curr_line, linklist, in_code_block): if matches: for match in matches: curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") - linklist.append(match[1]) + if ".md" not in match[1]: + if "#" not in match[1]: + linklist.append(match[1]) + else: + linklist.append(DOCS_URL + main_title + "/" + match[1]) + else: + linklist.append(DOCS_URL + match[1].replace(".md", "/").replace("index", "").rstrip("/")) # codeblock (with ``` -> always stands on a separate line, so line can be dropped) if '```' in curr_line: @@ -166,7 +197,6 @@ def replace_markdown_markers(curr_line, linklist, in_code_block): # keep the rest else: - # print("<" + content + ">") pass # structures with !!! (info, tips, warnings) @@ -199,6 +229,91 @@ def replace_markdown_markers(curr_line, linklist, in_code_block): return curr_line, linklist +def split_text(file, main_title): + + # start of assuming we haven't encountered a title + after_first_title = False + + # start of assuming we are not in a code_block + in_code_block = False + + # define initial dictionaries + paragraphs_text = {} + paragraphs_metadata = {} + + # list to keep track of links in the text + link_list = [] + + # list to keep track of the order of the subtitles + subtitle_order = [] + + # variable to keep track of the title level + title_level = 0 + + # list to keep track of most recent directories on each title level + if LINUX_TUTORIAL not in file: + curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] + else: + curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)] + + with open(file, 'r') as readfile: + + for line in readfile: + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] + + title_level = check_for_title_simple(line, in_code_block, curr_dirs) + + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block + + # line is a title with a maximum depth of 4 + if title_level > 0: + if after_first_title: + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + title = make_valid_title(line[title_level + 1:-1]) + + # create an entry for the file in the paragraphs text dictionary + paragraphs_text[title] = "" + + after_first_title = True + subtitle_order.append(title) + + # reset link_list + link_list = [] + + # line is not a title + elif after_first_title: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + if title in paragraphs_text.keys() and line != "\n": + paragraphs_text[title] += line + elif line != "\n": + paragraphs_text[title] = line + + # write metadata for the last file + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level]) + + return paragraphs_text, paragraphs_metadata, subtitle_order + + +def write_metadata(main_title, subtitle, links, title_level, directory): + + paragraph_metadata = {'main_title': main_title, 'subtitle': subtitle, 'title_depth': title_level, 'directory': directory} + + if len(links) > 0: + paragraph_metadata['links'] = {} + for i, link in enumerate(links): + paragraph_metadata['links'][str(i)] = link + + paragraph_metadata['parent_title'] = Path(directory).parent.name + + return paragraph_metadata + + def jinja_parser(filename, copy_location): """ function that let's jinja do its thing to format the files except for the os-related if-statements @@ -434,7 +549,7 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block): os_list = [GENERIC_DIR, LINUX, WINDOWS, MACOS] for i, os_ in enumerate(os_list): if os_ in file_name: - curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block) + curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block, "placeholder") if CONTENT in data: data[CONTENT] += curr_line @@ -532,6 +647,66 @@ def make_valid_title(title): return valid_filename +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): + + # make the directory needed for the files that will be written + filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"]) + os.makedirs(filepath) + + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) + + +def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): + for i, OS in enumerate([LINUX, WINDOWS, MACOS]): + + # Unmangle if's to use jinja parser + paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title]) + + # Use jinja to render a different version of the text for each OS + template = Template(paragraphs_text[title]) + text = template.render(OS=OS) + + # define the filepath + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) + os.makedirs(filepath) + + # write the files + write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS) + + +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): + # write text file + with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + ".txt"), 'w') as writefile: + writefile.write(text) + + # write metadata + metadata = paragraphs_metadata[title] + + if title_order_number != 0: + metadata["previous_title"] = title_order[title_order_number - 1] + else: + metadata["previous_title"] = None + + if title_order_number != len(title_order) - 1: + metadata["next_title"] = title_order[title_order_number + 1] + else: + metadata["next_title"] = None + + metadata["OS"] = OS + + if bool(LINUX_TUTORIAL in paragraphs_metadata[title]["directory"]): + linux_part = LINUX_TUTORIAL + "/" + else: + linux_part = "" + if OS == GENERIC: + os_part = "" + else: + os_part = OS + "/" + metadata["reference_link"] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title]["main_title"] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') + + with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + "_metadata.json"), 'w') as writefile: + json.dump(metadata, writefile, indent=4) + def main(): """ main function @@ -557,21 +732,27 @@ def main(): ################### define loop-invariant variables ################### - # variable that keeps track of the source directories - source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - - # list of all the filenames + # # variable that keeps track of the source directories + # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + # os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + # + # # list of all the filenames + # filenames_generic = {} + # filenames_linux = {} + # for source_directory in source_directories: + # all_items = os.listdir(source_directory) + # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + # for file in files: + # if LINUX_TUTORIAL in source_directory: + # filenames_linux[file] = os.path.join(source_directory, file) + # else: + # filenames_generic[file] = os.path.join(source_directory, file) + + # Temporary variables to test with just one singular file filenames_generic = {} filenames_linux = {} - for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if LINUX_TUTORIAL in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) + filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md" + filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files for filenames in [filenames_generic, filenames_linux]: @@ -621,6 +802,10 @@ def main(): # dictionaries to keep track of current OS active_OS_if_states = {LINUX: INACTIVE, WINDOWS: INACTIVE, MACOS: INACTIVE} + # dictionaries to save the paragraphs and metadata before it is written to files + paragraphs_text = {} + paragraphs_metadata = {} + # variable that shows whether the first title has been reached yet after_first_title = False @@ -636,37 +821,56 @@ def main(): # process the jinja macros jinja_parser(filename, copy_file) - # open the file and store line by line in the right file - with open(copy_file, 'r') as readfile: - - for line in readfile: - title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) + # split the text in paragraphs + paragraphs_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title) - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block + # for every section, either make the whole section generic, or create an os-specific file for each OS + for i, subtitle in enumerate(subtitle_order): - # line is a title with a maximum depth of 4 - if title_level > 0: - last_title = title - last_directory = directory - after_first_title = True + # generic + if IF_MANGLED_PART not in paragraphs_text[subtitle]: + write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line, active_OS_if_states) - while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE: - if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: - link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) - next_action = check_if_statements(next_action[1], active_OS_if_states) - - if next_action[0] == WRITE_TEXT: - link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) - - # write end of file for the last file - for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) + # os-specific + else: + write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) + + + # # open the file and store line by line in the right file + # with open(copy_file, 'r') as readfile: + # + # for line in readfile: + # title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) + # + # # detect codeblocks to make sure titles aren't detected in them + # if '```' in line or (('
' in line) ^ ('
' in line)): + # in_code_block = not in_code_block + # + # # line is a title with a maximum depth of 4 + # if title_level > 0: + # last_title = title + # last_directory = directory + # after_first_title = True + # + # # line is not a title + # elif after_first_title: + # # check for if-statements and write the appropriate lines in the right files + # next_action = check_if_statements(line, active_OS_if_states) + # while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE: + # if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: + # link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) + # next_action = check_if_statements(next_action[1], active_OS_if_states) + # + # if next_action[0] == WRITE_TEXT: + # link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) + # + # # write end of file for the last file + # for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): + # write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) + + print(paragraphs_text) + print(paragraphs_metadata) + print(subtitle_order) # remove_directory_tree(COPIES) # remove_directory_tree(IF_MANGLED_FILES) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 19ed8a2a29d..907f08fda77 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -3,4 +3,5 @@ re shutil pypandoc yaml -jinja2 \ No newline at end of file +jinja2 +pathlib \ No newline at end of file From 38c45723441d13cde3c799f4aa76bff9f4093bfe Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 20 Aug 2024 14:42:28 +0200 Subject: [PATCH 053/152] added support to filter out collapsable admonitions --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 72aa40292f9..4c75df17af0 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -203,6 +203,10 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): if '!!!' in curr_line: curr_line = re.sub(r'!!!', "", curr_line) + # structures with ??? (collapsable admonitions) + if '???' in curr_line: + curr_line = re.sub(r'\?\?\?', "", curr_line) + # get rid of other markdown indicators (`, *, +, _) if not in_code_block: From 5cbd6533333b0226d812fec08c62b3001ba53ade Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 20 Aug 2024 16:49:51 +0200 Subject: [PATCH 054/152] attempt at fix for problems with jinja include, not working yet --- .../chatbot_parser.py | 51 ++++++++++++++++--- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 4c75df17af0..c1bd1a99b60 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -254,6 +254,9 @@ def split_text(file, main_title): # variable to keep track of the title level title_level = 0 + # variable to allow for if statements to "continue" over multiple paragraphs + open_ifs = "" + # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] @@ -278,11 +281,12 @@ def split_text(file, main_title): # line is a title with a maximum depth of 4 if title_level > 0: if after_first_title: + paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) title = make_valid_title(line[title_level + 1:-1]) # create an entry for the file in the paragraphs text dictionary - paragraphs_text[title] = "" + paragraphs_text[title] = open_ifs after_first_title = True subtitle_order.append(title) @@ -318,6 +322,38 @@ def write_metadata(main_title, subtitle, links, title_level, directory): return paragraph_metadata +def close_ifs(text): + patterns = { + 'if': r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', + 'endif': r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', + 'else': r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})' + } + if_count = len(re.findall(patterns['if'], text.replace("\n", ""))) + endif_count = len(re.findall(patterns['endif'], text.replace("\n", ""))) + if IF_MANGLED_PART not in text or if_count == endif_count: + return text, "" + else: + + # Find all matches for each pattern + matches = [] + for key, pattern in patterns.items(): + for match in re.finditer(pattern, text): + matches.append(match) + + # sort the matches according to their start index + matches.sort(key=lambda x: x.start()) + + # extract the strings from the matches + open_ifs = [] + for match in matches: + open_ifs.append(match.group(0)) + + # Concatenate all matches into a single string + open_ifs = ''.join(open_ifs) + + return text + r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}', open_ifs + + def jinja_parser(filename, copy_location): """ function that let's jinja do its thing to format the files except for the os-related if-statements @@ -380,6 +416,7 @@ def mangle_os_ifs(line, is_os): if_match = re.search(r'if ', match.group(1)) if_os_match = re.search(r'if OS ', match.group(1)) endif_match = re.search(r'endif', match.group(1)) + else_match = re.search(r'else', match.group(1)) # mangle positions pos_first_mangle = constr_match.start() + start_index + added_length + 1 @@ -416,7 +453,7 @@ def mangle_os_ifs(line, is_os): else: is_os = NON_OS_IF - else: + elif else_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) @@ -655,7 +692,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, # make the directory needed for the files that will be written filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"]) - os.makedirs(filepath) + os.makedirs(filepath, exist_ok=True) write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) @@ -672,7 +709,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # define the filepath filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) - os.makedirs(filepath) + os.makedirs(filepath, exist_ok=True) # write the files write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS) @@ -761,6 +798,7 @@ def main(): # for loops over all files for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): + # print(filename) ################### define/reset loop specific variables ################### # variable that keeps track of whether file is part of the linux tutorial @@ -830,6 +868,7 @@ def main(): # for every section, either make the whole section generic, or create an os-specific file for each OS for i, subtitle in enumerate(subtitle_order): + # print(subtitle) # generic if IF_MANGLED_PART not in paragraphs_text[subtitle]: @@ -872,10 +911,6 @@ def main(): # for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): # write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) - print(paragraphs_text) - print(paragraphs_metadata) - print(subtitle_order) - # remove_directory_tree(COPIES) # remove_directory_tree(IF_MANGLED_FILES) From 0e6f8b27f19c2256880960c9ee48b680045c5419 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 10:02:41 +0200 Subject: [PATCH 055/152] fixed an issue with jinja templates --- .../chatbot_parser.py | 60 ++++++++++++------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c1bd1a99b60..e72dc0643de 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -5,7 +5,7 @@ import yaml from itertools import chain from pathlib import Path -from jinja2 import FileSystemLoader, Environment, ChoiceLoader, Template +from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template #################### define macro's #################### # customizable macros @@ -381,7 +381,7 @@ def jinja_parser(filename, copy_location): mangle_ifs(copy_location, filename) # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath=IF_MANGLED_FILES), FileSystemLoader(searchpath=os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR))]) + template_loader = ChoiceLoader([FileSystemLoader(searchpath=[IF_MANGLED_FILES, os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR)]), FunctionLoader(load_macros)]) templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) @@ -391,6 +391,24 @@ def jinja_parser(filename, copy_location): output_file.write(rendered_content) +def load_macros(name): + """ + function used by the jinja FunctionLoader to retrieve templates from the macros folder since the normal FileSystemLoader can't locate them properly + + :param name: name of the package + :return: + """ + + macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, "macros") + + if "../macros/" in name: + package_name = name.split("../macros/")[1] + file_location = os.path.join(macros_location, package_name) + + with open(file_location, 'r') as readfile: + return readfile.read() + + def mangle_os_ifs(line, is_os): """ function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. @@ -773,27 +791,27 @@ def main(): ################### define loop-invariant variables ################### - # # variable that keeps track of the source directories - # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - # os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - # - # # list of all the filenames - # filenames_generic = {} - # filenames_linux = {} - # for source_directory in source_directories: - # all_items = os.listdir(source_directory) - # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - # for file in files: - # if LINUX_TUTORIAL in source_directory: - # filenames_linux[file] = os.path.join(source_directory, file) - # else: - # filenames_generic[file] = os.path.join(source_directory, file) - - # Temporary variables to test with just one singular file + # variable that keeps track of the source directories + source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + + # list of all the filenames filenames_generic = {} filenames_linux = {} - filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md" - filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" + for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + if LINUX_TUTORIAL in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) + + # # Temporary variables to test with just one singular file + # filenames_generic = {} + # filenames_linux = {} + # filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md" + # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files for filenames in [filenames_generic, filenames_linux]: From cd778370a6cab55700d3e66745a049d9a644b3f9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 10:31:10 +0200 Subject: [PATCH 056/152] added docstrings to new functions --- .../chatbot_parser.py | 79 +++++++++++++++++-- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e72dc0643de..938da0628c8 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -62,7 +62,7 @@ ################### define functions ################### -def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): +def check_for_title_xl(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): """ function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables @@ -115,8 +115,15 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists -def check_for_title_simple(line, in_code_block, curr_dirs): +def check_for_title(line, in_code_block, curr_dirs): + """ + function that checks for titles in the current line. Used by split_text to split the text among the subtitles + :param line: the current line to be checked for a title + :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles + :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found + :return title_length: The amount of hashtags in front of the title on the current line + """ # detect titles match = re.match(r'^#+ ', line) if match and len(match.group(0)) <= 5 and not in_code_block: @@ -234,6 +241,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): def split_text(file, main_title): + """ + Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata + :param file: the filepath of the file to be split + :param main_title: the main title of the file + :return paragraphs_text: dictionary containing the split sections of text + :return paragraphs_metadata: dictionary containing the metadata of each split section of text + :return subtitle_order: list containing all encountered subtitles in order of appearance + """ # start of assuming we haven't encountered a title after_first_title = False @@ -272,7 +287,7 @@ def split_text(file, main_title): last_title_level = title_level last_dir = curr_dirs[last_title_level] - title_level = check_for_title_simple(line, in_code_block, curr_dirs) + title_level = check_for_title(line, in_code_block, curr_dirs) # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): @@ -309,6 +324,16 @@ def split_text(file, main_title): def write_metadata(main_title, subtitle, links, title_level, directory): + """ + Function that writes metadata about a text section to a dictionary + + :param main_title: The main title of the file containing the section + :param subtitle: the title of the section + :param links: a list of links contained within the section + :param title_level: the depth of the title of the section + :param directory: the directory where the section will eventually be written (can either be generic or os-specific) + :return paragraph_metadata: dictionary containing the metadata about the section + """ paragraph_metadata = {'main_title': main_title, 'subtitle': subtitle, 'title_depth': title_level, 'directory': directory} @@ -323,6 +348,17 @@ def write_metadata(main_title, subtitle, links, title_level, directory): def close_ifs(text): + """ + Function to check whether all if-statements in a section are closed properly. If that is not the case, the function + closes all if-statements at the end of the section and returns a prefix for the next section containing all if-statements + of the section it is processing. This needs to be done because the start of the next section would also be contained within the + last unclosed if-statement of its previous section. + + :param text: the text of the section it checks + :return text: the adapted text where all if-statements are closed + :return prefix: the prefix for the next section + """ + patterns = { 'if': r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', 'endif': r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', @@ -707,6 +743,16 @@ def make_valid_title(title): def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): + """ + Function that writes text and metadata of a generic (non-os-specific) file + + :param title: title of section + :param paragraphs_text: dictionary containing all paragraphs of text + :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text + :param title_order: list containing all subtitles in order + :param title_order_number: order number of the title of the section that is being written + :return: + """ # make the directory needed for the files that will be written filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"]) @@ -716,6 +762,16 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): + """ + Function that writes text and metadata of os-specific files + + :param title: title of section + :param paragraphs_text: dictionary containing all paragraphs of text + :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text + :param title_order: list containing all subtitles in order + :param title_order_number: order number of the title of the section that is being written + :return: + """ for i, OS in enumerate([LINUX, WINDOWS, MACOS]): # Unmangle if's to use jinja parser @@ -734,6 +790,19 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): + """ + Function to write files to a certain filepath + + :param title: title of the section to be written + :param text: section of text to be written + :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text + :param title_order: list containing all subtitles in order + :param title_order_number: order number of the title of the section that is being written + :param filepath: filepath to write files to + :param OS: OS to be included in the metadata + :return: + """ + # write text file with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + ".txt"), 'w') as writefile: writefile.write(text) @@ -816,7 +885,6 @@ def main(): # for loops over all files for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): - # print(filename) ################### define/reset loop specific variables ################### # variable that keeps track of whether file is part of the linux tutorial @@ -896,12 +964,11 @@ def main(): else: write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) - # # open the file and store line by line in the right file # with open(copy_file, 'r') as readfile: # # for line in readfile: - # title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) + # title_level, title, directory, curr_dirs, link_lists = check_for_title_xl(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) # # # detect codeblocks to make sure titles aren't detected in them # if '```' in line or (('
' in line) ^ ('
' in line)): From 98eb695790b30cfbde32c0b837a318ed11c88d59 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 11:24:35 +0200 Subject: [PATCH 057/152] only add necessary if-statements in front of non-if-complete sections --- .../chatbot_parser.py | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 938da0628c8..e15fce4f049 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -384,10 +384,28 @@ def close_ifs(text): for match in matches: open_ifs.append(match.group(0)) + # only include the non-closed if-statements + changed = True + while changed: + changed = False + last_if = -1 + last_else = -1 + for i, if_part in enumerate(open_ifs): + if re.search(patterns['if'], if_part): + last_if = i + elif re.search(patterns['else'], if_part): + last_else = i + elif re.search(patterns['endif'], if_part): + changed = True + del open_ifs[i] + if last_else > last_if: + del open_ifs[last_else] + del open_ifs[last_if] + # Concatenate all matches into a single string open_ifs = ''.join(open_ifs) - return text + r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}', open_ifs + return text + (r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}')*(if_count - endif_count), open_ifs def jinja_parser(filename, copy_location): @@ -451,7 +469,7 @@ def mangle_os_ifs(line, is_os): We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. :param line: the current line to check for os-related if-statements - :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF + :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF NON_OS_IF: not in an os-if NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if OS_IF: in an os-if @@ -492,7 +510,7 @@ def mangle_os_ifs(line, is_os): is_os = OS_IF elif is_os == NON_OS_IF_IN_OS_IF: is_os = OS_IF - + elif if_match: if if_os_match: line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling @@ -506,7 +524,7 @@ def mangle_os_ifs(line, is_os): is_os = NON_OS_IF_IN_OS_IF else: is_os = NON_OS_IF - + elif else_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling @@ -879,7 +897,7 @@ def main(): # # Temporary variables to test with just one singular file # filenames_generic = {} # filenames_linux = {} - # filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md" + # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files From 27457e371bdb494c06ac73a6cf4263a69d389631 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 12:12:45 +0200 Subject: [PATCH 058/152] fixed some more jinja problems --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e15fce4f049..6bc9df169e3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -401,6 +401,7 @@ def close_ifs(text): if last_else > last_if: del open_ifs[last_else] del open_ifs[last_if] + break # Concatenate all matches into a single string open_ifs = ''.join(open_ifs) @@ -795,10 +796,16 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # Unmangle if's to use jinja parser paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title]) + # slightly alter if-statements to be able to use predefined macros + paragraphs_text[title] = re.sub(OS, '"' + OS + '"', paragraphs_text[title]) + # Use jinja to render a different version of the text for each OS template = Template(paragraphs_text[title]) text = template.render(OS=OS) + # readjust text to correct overcorrections + text = re.sub('"' + OS + '"', OS, text) + # define the filepath filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) os.makedirs(filepath, exist_ok=True) From bb722876b2734c5a9deba84128fae6713e499652 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 12:33:18 +0200 Subject: [PATCH 059/152] implemented extra test to make sure generic files dont accidentally get flagged as os-specific --- .../chatbot_parser.py | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 6bc9df169e3..a7d2fd5b5e2 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -791,7 +791,8 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or :param title_order_number: order number of the title of the section that is being written :return: """ - for i, OS in enumerate([LINUX, WINDOWS, MACOS]): + text = {} + for OS in [LINUX, WINDOWS, MACOS]: # Unmangle if's to use jinja parser paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title]) @@ -801,17 +802,24 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # Use jinja to render a different version of the text for each OS template = Template(paragraphs_text[title]) - text = template.render(OS=OS) + text[OS] = template.render(OS=OS) # readjust text to correct overcorrections - text = re.sub('"' + OS + '"', OS, text) - - # define the filepath - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) - os.makedirs(filepath, exist_ok=True) - - # write the files - write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS) + text[OS] = re.sub('"' + OS + '"', OS, text[OS]) + + # check that not all versions are the same + unique_texts = set(text.values()) + if len(unique_texts) > 1: + for OS in [LINUX, WINDOWS, MACOS]: + # define the filepath + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) + os.makedirs(filepath, exist_ok=True) + + # write the files + write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS) + else: + paragraphs_text[title] = text[OS] + write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number) def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): From 67cb19e874e64d46564a8a5d34abc64c2a65e2a5 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 13:22:56 +0200 Subject: [PATCH 060/152] make sure empty os-specific files are not saved --- .../chatbot_parser.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a7d2fd5b5e2..2c78ad90df3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -809,17 +809,22 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # check that not all versions are the same unique_texts = set(text.values()) - if len(unique_texts) > 1: - for OS in [LINUX, WINDOWS, MACOS]: - # define the filepath - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) - os.makedirs(filepath, exist_ok=True) - - # write the files - write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS) - else: + if len(unique_texts) == 1: paragraphs_text[title] = text[OS] write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number) + else: + for OS in [LINUX, WINDOWS, MACOS]: + # check that file actually has some content + if len(text[OS]) > 0: + # define the filepath + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) + os.makedirs(filepath, exist_ok=True) + + # write the files + write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS) + else: + # don't write empty files + pass def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): From cf9834a25aa1ab3e690cefa76705888605afae2b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 13:27:05 +0200 Subject: [PATCH 061/152] clean up unused code --- .../chatbot_parser.py | 294 ------------------ 1 file changed, 294 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 2c78ad90df3..51b4efa00b2 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -62,58 +62,6 @@ ################### define functions ################### -def check_for_title_xl(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): - """ - function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables - - :param curr_line: the line to be checked for a title - :param main_title: the main title of the file, needed in the case where a file is finished - :param last_directory: the most recently encountered directory - :param last_title: the most recently encountered title - :param curr_dirs: the most recent directories at each title level - :param root_dirs: a list containing the root directories - :param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file - :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial - :param in_code_block_: boolean to indicate whether the current line is part of a codeblock - :return: the depth of the title - :return: the title found in the line if any - :return: the new directory in which the next file will be written - :return link_lists: updated link_lists - """ - - # detect titles - match = re.match(r'^#+ ', curr_line) - if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1: - logic_output = len(match.group(0)) - 1 - else: - logic_output = 0 - - # make necessary changes if a title has been detected - if logic_output == 0 or in_code_block_: - return 0, None, None, curr_dirs, link_lists - else: - - # if a new title is detected, write the end of the previous file - if last_title is not None: - for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title) - - # reset the link lists for each OS - for i in range(4): - link_lists[i] = [] - - # make a new directory corresponding with the new title - curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) - - for i in range(4): - os.makedirs(os.path.join(root_dirs[i], curr_dirs[logic_output]), exist_ok=True) - - # update the higher order current directories - for i in range(logic_output + 1, MAX_TITLE_DEPTH + 1): - curr_dirs[i] = curr_dirs[logic_output] - - return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists - def check_for_title(line, in_code_block, curr_dirs): """ @@ -554,191 +502,6 @@ def mangle_ifs(directory, filename): write_file.write(new_line) -def check_if_statements(curr_line, active_OS_if_states): - """ - function that checks for if-statements - - :param curr_line: the line to be checked for if-statements to build the directory structure - :param active_OS_if_states: dictionary keeping track of the active OS states according to the if-statements - :return: the next action to be done with the line: - DONE: An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. - CHECK_EXTRA_MESSAGE: An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. - WRITE_TEXT: No if-statement has been found, write the current line to a file (can also be part of the current line) - WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line. - :return: the extra message to be checked, if any - :return: the text to be written to the file, if any - """ - # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{' + IF_MANGLED_PART + '%(.*?)%' + IF_MANGLED_PART + '}(.*)', curr_line) - - # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '})(.*)', curr_line) - - if match: - content = match.group(1) - - # new if-statement wrt OS with '==' - if re.search(r'if OS == ', content): - OS = content.split()[-1] - - # set new active OS - active_OS_if_states[OS] = ACTIVE - - # set other active ones on inactive - for other_OS in active_OS_if_states.keys(): - if other_OS != OS and active_OS_if_states[other_OS] == ACTIVE: - active_OS_if_states[other_OS] = INACTIVE - - # new if-statement wrt OS with '!=' - elif re.search(r'if OS != ', content): - OS = content.split()[-1] - - # set new active OS - active_OS_if_states[OS] = INACTIVE - - # set other inactive ones on active - for other_OS in active_OS_if_states.keys(): - if other_OS != OS and active_OS_if_states[other_OS] == INACTIVE: - active_OS_if_states[other_OS] = ACTIVE - - # endif statement wrt OS - elif re.search(r'endif', content): - if str(1) in active_OS_if_states.values(): - active_OS_if_states[ - list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = ACTIVE - else: - for key in active_OS_if_states.keys(): - active_OS_if_states[key] = INACTIVE - - # else statement wrt OS - elif re.search(r'else', content): - - i = 0 - for i in range(3): - if str(i) not in active_OS_if_states.values(): - break - - # set the previously active one on inactive until the next endif - key_list = list(active_OS_if_states.keys()) - position = list(active_OS_if_states.values()).index(ACTIVE) - active_OS_if_states[key_list[position]] = str(i) - - # set inactive ones on active - while INACTIVE in active_OS_if_states.values(): - position = list(active_OS_if_states.values()).index(INACTIVE) - active_OS_if_states[key_list[position]] = ACTIVE - - if len(match.group(2)) != 0: - extra_message = match.group(2).lstrip() - return CHECK_EXTRA_MESSAGE, extra_message, None - - else: - return DONE, None, None - - elif match_large: - return WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE, match_large.group(2), match_large.group(1) - - else: - return WRITE_TEXT, None, curr_line - - -def write_text_to_file(file_name, curr_line, link_lists, in_code_block): - """ - function that writes a line to a file - - :param file_name: target file to write the line to - :param curr_line: line to be written to the file - :param link_lists: list containing all the links that will be printed at the end of files - :param in_code_block: boolean indicating whether the current line is in a codeblock - :return link_lists: updated link_lists - """ - - if os.path.exists(file_name) or curr_line.strip(): - if os.path.exists(file_name): - with open(file_name, "r") as read_file: - data = json.load(read_file) - else: - data = {} - - os_list = [GENERIC_DIR, LINUX, WINDOWS, MACOS] - for i, os_ in enumerate(os_list): - if os_ in file_name: - curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block, "placeholder") - - if CONTENT in data: - data[CONTENT] += curr_line - else: - data[CONTENT] = curr_line - - with open(file_name, "w") as write_file: - json.dump(data, write_file, indent=4) - - return link_lists - - -def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title, root_dirs, link_lists, in_code_block): - """ - function that decides what file to write text to - - :param curr_line: line to be written to a file - :param active_OS_if_states: dictionary keeping track of which OSes are active according to the if-statements - :param last_directory: most recently made directory - :param last_title: the most recently encountered title - :param root_dirs: a list with all root directories - :param link_lists: list of links that need to be written at the end of the files - :param in_code_block: boolean indicating whether the current line is in a code block - :return link_lists: an updated link_lists - """ - # check that the line is part of the website for gent - if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - else: - for i, os_ in enumerate([LINUX, WINDOWS, MACOS]): - if active_OS_if_states[os_] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[i + 1], last_directory, last_title + ".json"), - curr_line, link_lists, in_code_block) - - return link_lists - - -def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_title, last_title): - """ - function that adds the links that should be at the end of a file - - :param file_location: the location of the file - :param OS: the OS of the file - :param linklist: the links that should be at the end of the file - :param is_linux_tutorial_: boolean indicating whether the file is part of the linux tutorial - :param main_title: the main title of the file, to be used in the reference link - :param last_title: the most recently encountered title - :return: - """ - - if os.path.exists(file_location): - - if len(OS) > 0: - OS = OS + "/" - - with open(file_location, "r") as read_file: - data = json.load(read_file) - - # add the links from within the document - data[LINKS] = {} - for i, link in enumerate(linklist): - data[LINKS][str(i + 1)] = str(link) - - if is_linux_tutorial_: - linux_part = LINUX_TUTORIAL + "/" - else: - linux_part = "" - - # add the reference link - data[REFERENCE_LINK] = (DOCS_URL + "/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) - - with open(file_location, 'w') as write_file: - json.dump(data, write_file, indent=4) - - def make_valid_title(title): """ function that makes sure all titles can be used as valid filenames @@ -946,7 +709,6 @@ def main(): root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX) root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS) - root_dirs = [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos] # variable for the main title (needed for reference links) main_title = filename[:-3] @@ -954,30 +716,6 @@ def main(): # variable that keeps track of the directories that are used to write in at different levels curr_dirs = [filename[:-3] for _ in range(5)] - # variable that keeps track of the latest non-zero level title and corresponding directory - last_title = None - last_directory = None - - # list to keep track of links in the text - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - link_lists = [links_generic, links_linux, links_windows, links_macos] - - # dictionaries to keep track of current OS - active_OS_if_states = {LINUX: INACTIVE, WINDOWS: INACTIVE, MACOS: INACTIVE} - - # dictionaries to save the paragraphs and metadata before it is written to files - paragraphs_text = {} - paragraphs_metadata = {} - - # variable that shows whether the first title has been reached yet - after_first_title = False - - # variable that is used to be sure that we are detecting titles and not comments from codeblocks - in_code_block = False - ################### actually parse the md file ################### # create directories for the source markdown file @@ -1002,38 +740,6 @@ def main(): else: write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) - # # open the file and store line by line in the right file - # with open(copy_file, 'r') as readfile: - # - # for line in readfile: - # title_level, title, directory, curr_dirs, link_lists = check_for_title_xl(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) - # - # # detect codeblocks to make sure titles aren't detected in them - # if '```' in line or (('
' in line) ^ ('
' in line)): - # in_code_block = not in_code_block - # - # # line is a title with a maximum depth of 4 - # if title_level > 0: - # last_title = title - # last_directory = directory - # after_first_title = True - # - # # line is not a title - # elif after_first_title: - # # check for if-statements and write the appropriate lines in the right files - # next_action = check_if_statements(line, active_OS_if_states) - # while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE: - # if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: - # link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) - # next_action = check_if_statements(next_action[1], active_OS_if_states) - # - # if next_action[0] == WRITE_TEXT: - # link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) - # - # # write end of file for the last file - # for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - # write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) - # remove_directory_tree(COPIES) # remove_directory_tree(IF_MANGLED_FILES) From da32459088fd4bcb0b665df5ab3b24464a585925 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 13:43:04 +0200 Subject: [PATCH 062/152] introduce more macros --- .../chatbot_parser.py | 70 +++++++++++-------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 51b4efa00b2..91165d97429 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -25,6 +25,7 @@ EXTRA_DIR = "extra" GENERIC_DIR = "generic" OS_SPECIFIC_DIR = "os_specific" +MACROS = "macros" # OSes LINUX = "linux" @@ -55,11 +56,23 @@ CHECK_EXTRA_MESSAGE = "check_extra_message" WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message" -# JSON attributes -CONTENT = "content" +# Metadata attributes +MAIN_TITLE = "main_title" +SUBTITLE = "subtitle" +TITLE_DEPTH = "title_depth" +DIRECTORY = "directory" LINKS = "links" +PARENT_TITLE = "parent_title" +PREVIOUS_TITLE = "previous_title" +NEXT_TITLE = "next_title" +METADATA_OS = "OS" REFERENCE_LINK = "reference_link" +# if-structure components +IF = "if" +ELSE = "else" +ENDIF = "endif" + ################### define functions ################### @@ -283,14 +296,14 @@ def write_metadata(main_title, subtitle, links, title_level, directory): :return paragraph_metadata: dictionary containing the metadata about the section """ - paragraph_metadata = {'main_title': main_title, 'subtitle': subtitle, 'title_depth': title_level, 'directory': directory} + paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, TITLE_DEPTH: title_level, DIRECTORY: directory} if len(links) > 0: - paragraph_metadata['links'] = {} + paragraph_metadata[LINKS] = {} for i, link in enumerate(links): - paragraph_metadata['links'][str(i)] = link + paragraph_metadata[LINKS][str(i)] = link - paragraph_metadata['parent_title'] = Path(directory).parent.name + paragraph_metadata[PARENT_TITLE] = Path(directory).parent.name return paragraph_metadata @@ -308,12 +321,12 @@ def close_ifs(text): """ patterns = { - 'if': r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', - 'endif': r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', - 'else': r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})' + IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', + ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', + ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})' } - if_count = len(re.findall(patterns['if'], text.replace("\n", ""))) - endif_count = len(re.findall(patterns['endif'], text.replace("\n", ""))) + if_count = len(re.findall(patterns[IF], text.replace("\n", ""))) + endif_count = len(re.findall(patterns[ENDIF], text.replace("\n", ""))) if IF_MANGLED_PART not in text or if_count == endif_count: return text, "" else: @@ -339,11 +352,11 @@ def close_ifs(text): last_if = -1 last_else = -1 for i, if_part in enumerate(open_ifs): - if re.search(patterns['if'], if_part): + if re.search(patterns[IF], if_part): last_if = i - elif re.search(patterns['else'], if_part): + elif re.search(patterns[ELSE], if_part): last_else = i - elif re.search(patterns['endif'], if_part): + elif re.search(patterns[ENDIF], if_part): changed = True del open_ifs[i] if last_else > last_if: @@ -402,10 +415,10 @@ def load_macros(name): :return: """ - macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, "macros") + macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, MACROS) - if "../macros/" in name: - package_name = name.split("../macros/")[1] + if "../" + MACROS + "/" in name: + package_name = name.split("../" + MACROS + "/")[1] file_location = os.path.join(macros_location, package_name) with open(file_location, 'r') as readfile: @@ -537,7 +550,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, """ # make the directory needed for the files that will be written - filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"]) + filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) @@ -580,7 +593,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # check that file actually has some content if len(text[OS]) > 0: # define the filepath - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) # write the files @@ -605,25 +618,25 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe """ # write text file - with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + ".txt"), 'w') as writefile: + with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + ".txt"), 'w') as writefile: writefile.write(text) # write metadata metadata = paragraphs_metadata[title] if title_order_number != 0: - metadata["previous_title"] = title_order[title_order_number - 1] + metadata[PREVIOUS_TITLE] = title_order[title_order_number - 1] else: - metadata["previous_title"] = None + metadata[PREVIOUS_TITLE] = None if title_order_number != len(title_order) - 1: - metadata["next_title"] = title_order[title_order_number + 1] + metadata[NEXT_TITLE] = title_order[title_order_number + 1] else: - metadata["next_title"] = None + metadata[NEXT_TITLE] = None - metadata["OS"] = OS + metadata[METADATA_OS] = OS - if bool(LINUX_TUTORIAL in paragraphs_metadata[title]["directory"]): + if bool(LINUX_TUTORIAL in paragraphs_metadata[title][DIRECTORY]): linux_part = LINUX_TUTORIAL + "/" else: linux_part = "" @@ -631,11 +644,12 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = "" else: os_part = OS + "/" - metadata["reference_link"] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title]["main_title"] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') + metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') - with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + "_metadata.json"), 'w') as writefile: + with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + "_metadata.json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) + def main(): """ main function From 093200b232c1c6ed5c10530ec6a09717b2aaf263 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 13:48:54 +0200 Subject: [PATCH 063/152] reintroduce logic to remove unnecessary directories --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 91165d97429..8e3141c4b52 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -656,9 +656,9 @@ def main(): :return: """ # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - shutil.rmtree(PARSED_MDS) - shutil.rmtree(COPIES) - shutil.rmtree(IF_MANGLED_FILES) + shutil.rmtree(PARSED_MDS, ignore_errors=True) + shutil.rmtree(COPIES, ignore_errors=True) + shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) # make the necessary directories if not os.path.exists(COPIES): @@ -754,8 +754,8 @@ def main(): else: write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) - # remove_directory_tree(COPIES) - # remove_directory_tree(IF_MANGLED_FILES) + shutil.rmtree(COPIES, ignore_errors=True) + shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) ################### run the script ################### From 5d0ffe951e515ee3fb890b82a2431332e92b3d4a Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 14:34:45 +0200 Subject: [PATCH 064/152] added functionality to include links or leave them out --- .../chatbot_parser.py | 75 ++++++++++++++----- .../HPC_chatbot_preprocessor/requirements.txt | 1 + 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 8e3141c4b52..33ddefbdbbf 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,3 +1,4 @@ +import copy import json import os import re @@ -73,6 +74,10 @@ ELSE = "else" ENDIF = "endif" +# link indicators +LINK_BEFORE = r'Ā§linkĀ§linkĀ§' +LINK_AFTER = r'Ā§linkĀ§linkĀ§' + ################### define functions ################### @@ -120,14 +125,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: for match in matches: - curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") + curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_BEFORE + str(len(linklist)) + LINK_AFTER) if ".md" not in match[1]: if "#" not in match[1]: linklist.append(match[1]) else: - linklist.append(DOCS_URL + main_title + "/" + match[1]) + linklist.append(DOCS_URL + "/" + main_title + "/" + match[1]) else: - linklist.append(DOCS_URL + match[1].replace(".md", "/").replace("index", "").rstrip("/")) + linklist.append(DOCS_URL + "/" + match[1].replace(".md", "/").replace("index", "").rstrip("/")) # codeblock (with ``` -> always stands on a separate line, so line can be dropped) if '```' in curr_line: @@ -617,13 +622,17 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :return: """ + metadata = copy.deepcopy(paragraphs_metadata[title]) + # write text file with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + ".txt"), 'w') as writefile: - writefile.write(text) + if LINKS in paragraphs_metadata[title].keys(): + adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS]) + writefile.write(adapted_text) + else: + writefile.write(text) # write metadata - metadata = paragraphs_metadata[title] - if title_order_number != 0: metadata[PREVIOUS_TITLE] = title_order[title_order_number - 1] else: @@ -650,6 +659,32 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe json.dump(metadata, writefile, indent=4) +def insert_links(text, links): + """ + Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT + + :param text: The plaintext that needs to be adapted + :param links: The links that might need to be inserted + :return text: The adapted plaintext + :return links: The links that were actually present in the text + """ + + present_links = [] + new_links = {} + for link_number in re.finditer(LINK_BEFORE + r'([0-9]*?)' + LINK_AFTER, text): + present_links.append(link_number.group(1)) + if INCLUDE_LINKS_IN_PLAINTEXT: + text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, " " + links[link_number.group(1)] + " ", text) + else: + text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, "", text) + + for link_number in links.keys(): + if link_number in present_links: + new_links[len(new_links.keys())] = links[link_number] + + return text, new_links + + def main(): """ main function @@ -679,22 +714,22 @@ def main(): source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - # list of all the filenames - filenames_generic = {} - filenames_linux = {} - for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if LINUX_TUTORIAL in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) - - # # Temporary variables to test with just one singular file + # # list of all the filenames # filenames_generic = {} # filenames_linux = {} - # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" + # for source_directory in source_directories: + # all_items = os.listdir(source_directory) + # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + # for file in files: + # if LINUX_TUTORIAL in source_directory: + # filenames_linux[file] = os.path.join(source_directory, file) + # else: + # filenames_generic[file] = os.path.join(source_directory, file) + + # Temporary variables to test with just one singular file + filenames_generic = {} + filenames_linux = {} + filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 907f08fda77..3b118535f3b 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -1,3 +1,4 @@ +copy os re shutil From a3e34a97d0fec915d199b4be0b0a9a62f4b4be4f Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 14:37:16 +0200 Subject: [PATCH 065/152] added functionality to include links or leave them out --- scripts/HPC_chatbot_preprocessor/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 3b118535f3b..907f08fda77 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -1,4 +1,3 @@ -copy os re shutil From 7c6154b47023062d4b7b6ff5932b60ccb63d56c3 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 15:36:39 +0200 Subject: [PATCH 066/152] adapt filenames to allow for splitting on something other than subtitles --- .../chatbot_parser.py | 60 +++++++++++-------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 33ddefbdbbf..5c31199d731 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,7 +12,7 @@ # customizable macros MIN_PARAGRAPH_LENGTH = 128 MAX_TITLE_DEPTH = 4 -INCLUDE_LINKS_IN_PLAINTEXT = True +INCLUDE_LINKS_IN_PLAINTEXT = False # directories PARSED_MDS = "parsed_mds" @@ -59,7 +59,7 @@ # Metadata attributes MAIN_TITLE = "main_title" -SUBTITLE = "subtitle" +SUBTITLE = "subtitle (incorrect in some cases, working on a fix)" TITLE_DEPTH = "title_depth" DIRECTORY = "directory" LINKS = "links" @@ -542,7 +542,7 @@ def make_valid_title(title): return valid_filename -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): """ Function that writes text and metadata of a generic (non-os-specific) file @@ -551,6 +551,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written + :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ @@ -558,10 +559,10 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers) -def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): +def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): """ Function that writes text and metadata of os-specific files @@ -570,6 +571,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written + :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ text = {} @@ -592,7 +594,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or unique_texts = set(text.values()) if len(unique_texts) == 1: paragraphs_text[title] = text[OS] - write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number) + write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers) else: for OS in [LINUX, WINDOWS, MACOS]: # check that file actually has some content @@ -602,13 +604,13 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or os.makedirs(filepath, exist_ok=True) # write the files - write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS) + write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers=paragraph_numbers) else: # don't write empty files pass -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers): """ Function to write files to a certain filepath @@ -619,13 +621,14 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :param title_order_number: order number of the title of the section that is being written :param filepath: filepath to write files to :param OS: OS to be included in the metadata + :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ metadata = copy.deepcopy(paragraphs_metadata[title]) # write text file - with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + ".txt"), 'w') as writefile: + with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + ".txt"), 'w') as writefile: if LINKS in paragraphs_metadata[title].keys(): adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS]) writefile.write(adapted_text) @@ -655,9 +658,11 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = OS + "/" metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') - with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + "_metadata.json"), 'w') as writefile: + with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + "_metadata.json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) + paragraph_numbers[OS] += 1 + def insert_links(text, links): """ @@ -714,22 +719,22 @@ def main(): source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - # # list of all the filenames - # filenames_generic = {} - # filenames_linux = {} - # for source_directory in source_directories: - # all_items = os.listdir(source_directory) - # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - # for file in files: - # if LINUX_TUTORIAL in source_directory: - # filenames_linux[file] = os.path.join(source_directory, file) - # else: - # filenames_generic[file] = os.path.join(source_directory, file) - - # Temporary variables to test with just one singular file + # list of all the filenames filenames_generic = {} filenames_linux = {} - filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" + for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + if LINUX_TUTORIAL in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) + + # # Temporary variables to test with just one singular file + # filenames_generic = {} + # filenames_linux = {} + # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files @@ -765,6 +770,9 @@ def main(): # variable that keeps track of the directories that are used to write in at different levels curr_dirs = [filename[:-3] for _ in range(5)] + # dictionary that keeps track of the paragraph numbers + paragraph_numbers = {GENERIC: 1, LINUX: 1, WINDOWS: 1, MACOS: 1} + ################### actually parse the md file ################### # create directories for the source markdown file @@ -783,11 +791,11 @@ def main(): # generic if IF_MANGLED_PART not in paragraphs_text[subtitle]: - write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) + write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) # os-specific else: - write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) + write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) From 8d5b50dc727e284917eb1540d91f692f56ff8a4a Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 16:19:57 +0200 Subject: [PATCH 067/152] making some changes to prepare to add paragraph level splitting tomorrow --- .../chatbot_parser.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 5c31199d731..742522e6e70 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -13,6 +13,7 @@ MIN_PARAGRAPH_LENGTH = 128 MAX_TITLE_DEPTH = 4 INCLUDE_LINKS_IN_PLAINTEXT = False +DEEP_DIRECTORIES = True # directories PARSED_MDS = "parsed_mds" @@ -64,8 +65,8 @@ DIRECTORY = "directory" LINKS = "links" PARENT_TITLE = "parent_title" -PREVIOUS_TITLE = "previous_title" -NEXT_TITLE = "next_title" +PREVIOUS_SUBTITLE = "previous_title" +NEXT_SUBTITLE = "next_title" METADATA_OS = "OS" REFERENCE_LINK = "reference_link" @@ -75,8 +76,7 @@ ENDIF = "endif" # link indicators -LINK_BEFORE = r'Ā§linkĀ§linkĀ§' -LINK_AFTER = r'Ā§linkĀ§linkĀ§' +LINK_MARKER = r'Ā§linkĀ§linkĀ§' ################### define functions ################### @@ -94,11 +94,12 @@ def check_for_title(line, in_code_block, curr_dirs): match = re.match(r'^#+ ', line) if match and len(match.group(0)) <= 5 and not in_code_block: title_length = len(match.group(0)) - 1 - curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) + if DEEP_DIRECTORIES: + curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) - # update the higher order current directories - for i in range(title_length + 1, MAX_TITLE_DEPTH + 1): - curr_dirs[i] = curr_dirs[title_length] + # update the higher order current directories + for i in range(title_length + 1, MAX_TITLE_DEPTH + 1): + curr_dirs[i] = curr_dirs[title_length] return title_length else: @@ -125,7 +126,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: for match in matches: - curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_BEFORE + str(len(linklist)) + LINK_AFTER) + curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER) if ".md" not in match[1]: if "#" not in match[1]: linklist.append(match[1]) @@ -676,12 +677,12 @@ def insert_links(text, links): present_links = [] new_links = {} - for link_number in re.finditer(LINK_BEFORE + r'([0-9]*?)' + LINK_AFTER, text): + for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text): present_links.append(link_number.group(1)) if INCLUDE_LINKS_IN_PLAINTEXT: - text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, " " + links[link_number.group(1)] + " ", text) + text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text) else: - text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, "", text) + text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text) for link_number in links.keys(): if link_number in present_links: From 0c10376f1f3d5ea56f3ddc32fa580ff436413a73 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 16:20:41 +0200 Subject: [PATCH 068/152] making some changes to prepare to add paragraph level splitting tomorrow --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 742522e6e70..1c13edc93e3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -13,6 +13,8 @@ MIN_PARAGRAPH_LENGTH = 128 MAX_TITLE_DEPTH = 4 INCLUDE_LINKS_IN_PLAINTEXT = False +SPLIT_ON_TITLES = True +SPLIT_ON_PARAGRAPHS = False DEEP_DIRECTORIES = True # directories From f8ee8607545a5638de94787bb00046226e19cce0 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 16:30:55 +0200 Subject: [PATCH 069/152] making some changes to prepare to add paragraph level splitting tomorrow --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 1c13edc93e3..561e112d28e 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -640,14 +640,14 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe # write metadata if title_order_number != 0: - metadata[PREVIOUS_TITLE] = title_order[title_order_number - 1] + metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1] else: - metadata[PREVIOUS_TITLE] = None + metadata[PREVIOUS_SUBTITLE] = None if title_order_number != len(title_order) - 1: - metadata[NEXT_TITLE] = title_order[title_order_number + 1] + metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1] else: - metadata[NEXT_TITLE] = None + metadata[NEXT_SUBTITLE] = None metadata[METADATA_OS] = OS From 6533733a4d462db37544251af7a9d33697ad63bb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 17:04:10 +0200 Subject: [PATCH 070/152] adapted the parsing script to allow for testing in a semi-efficient way --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 561e112d28e..43cb93c5c08 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -531,7 +531,7 @@ def make_valid_title(title): :return valid_filename: the adapted title that can be used as filename """ # Define a regex pattern for invalid characters on both Windows and Linux - invalid_chars = r'[<>:"/\\|?*\0()]' + invalid_chars = r'[<>:"/\\|?*\0]' # get rid of extra information between {} brackets title = re.sub(r'\{.*?}', '', title) @@ -805,6 +805,7 @@ def main(): ################### run the script ################### -print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") -main() -print("Parsing finished successfully") +if __name__ == '__main__': + print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") + main() + print("Parsing finished successfully") From 2e7a00f1b724e77249caef30c62e8aa6c6c9f628 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 17:04:35 +0200 Subject: [PATCH 071/152] added test for make_valid_title --- .../tests/test_make_valid_title.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py new file mode 100644 index 00000000000..f3c423ed9c3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py @@ -0,0 +1,20 @@ +import pytest +import shutil +from chatbot_parser import make_valid_title + + +@pytest.mark.parametrize("input_string,expected", [ + ("", ""), + ("A-good-filename-with-dashes", "A-good-filename-with-dashes"), + (" A very good filename beginning and ending in a space ", "A very good filename beginning and ending in a space"), + ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"), + ("A filename containing bad characters <>:\"/\\|?*\0", "A filename containing bad characters"), + ("A filename ending with {some jinja garbage}", "A filename ending with") +]) +def test_make_valid_title(input_string, expected): + assert make_valid_title(input_string) == expected + + +shutil.rmtree("parsed_mds", ignore_errors=True) +shutil.rmtree("copies", ignore_errors=True) +shutil.rmtree("if_mangled_files", ignore_errors=True) From f5e0579fb6a83f1a8e643fc5b1b77309080bf0e3 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 17:07:49 +0200 Subject: [PATCH 072/152] removed useless lines from testscript --- .../HPC_chatbot_preprocessor/tests/test_make_valid_title.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py index f3c423ed9c3..aebecddd0f3 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py @@ -13,8 +13,3 @@ ]) def test_make_valid_title(input_string, expected): assert make_valid_title(input_string) == expected - - -shutil.rmtree("parsed_mds", ignore_errors=True) -shutil.rmtree("copies", ignore_errors=True) -shutil.rmtree("if_mangled_files", ignore_errors=True) From 6757b4f5eba4a105a1b5b94c6a9c720c25e74f2a Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 11:08:12 +0200 Subject: [PATCH 073/152] First attempt at splitting in paragraphs (need for other fixes for title-based-split first --- .../chatbot_parser.py | 45 +++++++++++++++---- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 43cb93c5c08..a148e7b2bbd 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -10,12 +10,12 @@ #################### define macro's #################### # customizable macros -MIN_PARAGRAPH_LENGTH = 128 +MIN_PARAGRAPH_LENGTH = 160 MAX_TITLE_DEPTH = 4 INCLUDE_LINKS_IN_PLAINTEXT = False SPLIT_ON_TITLES = True -SPLIT_ON_PARAGRAPHS = False -DEEP_DIRECTORIES = True +SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES +DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False # directories PARSED_MDS = "parsed_mds" @@ -219,8 +219,10 @@ def split_text(file, main_title): :return subtitle_order: list containing all encountered subtitles in order of appearance """ - # start of assuming we haven't encountered a title + # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet after_first_title = False + after_first_paragraph = False + paragraph_number = 1 # start of assuming we are not in a code_block in_code_block = False @@ -241,6 +243,12 @@ def split_text(file, main_title): # variable to allow for if statements to "continue" over multiple paragraphs open_ifs = "" + # initialise the first paragraph if SPLIT_ON_PARAGRAPH is True + if SPLIT_ON_PARAGRAPHS: + title = main_title + "_paragraph_" + str(paragraph_number) + paragraphs_text[title] = "" + subtitle_order.append(title) + # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] @@ -258,18 +266,18 @@ def split_text(file, main_title): title_level = check_for_title(line, in_code_block, curr_dirs) - # detect codeblocks to make sure titles aren't detected in them + # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block # line is a title with a maximum depth of 4 - if title_level > 0: + if title_level > 0 and SPLIT_ON_TITLES: if after_first_title: paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) title = make_valid_title(line[title_level + 1:-1]) - # create an entry for the file in the paragraphs text dictionary + # create an entry for the next file in the paragraphs text dictionary paragraphs_text[title] = open_ifs after_first_title = True @@ -278,8 +286,27 @@ def split_text(file, main_title): # reset link_list link_list = [] - # line is not a title - elif after_first_title: + elif title_level > 0 and not SPLIT_ON_TITLES: + paragraphs_text[title] += line[title_level + 1:] + + elif SPLIT_ON_PARAGRAPHS and line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH: + # finish the previous file + paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + + # start a new file + paragraph_number += 1 + title = make_valid_title(main_title + "_paragraph_" + str(paragraph_number)) + subtitle_order.append(title) + + # create an entry for the next file in the paragraphs text dictionary + paragraphs_text[title] = open_ifs + + # reset link_list + link_list = [] + + # line is not a title or the ending of a sufficiently large paragraph + elif after_first_title or SPLIT_ON_PARAGRAPHS: line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) if title in paragraphs_text.keys() and line != "\n": paragraphs_text[title] += line From 6d9558d1ccf2dd9950586d50b167d74637120e26 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 11:21:42 +0200 Subject: [PATCH 074/152] make two functions for different ways of dividing the text --- .../chatbot_parser.py | 96 +++++++++++++++---- 1 file changed, 80 insertions(+), 16 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a148e7b2bbd..1f6b82e8a44 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -219,10 +219,15 @@ def split_text(file, main_title): :return subtitle_order: list containing all encountered subtitles in order of appearance """ - # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet + if SPLIT_ON_TITLES: + return split_on_titles(file, main_title) + elif SPLIT_ON_PARAGRAPHS: + return split_on_paragraphs(file, main_title) + + +def split_on_titles(file, main_title): + # start of assuming we haven't encountered a title after_first_title = False - after_first_paragraph = False - paragraph_number = 1 # start of assuming we are not in a code_block in_code_block = False @@ -243,12 +248,6 @@ def split_text(file, main_title): # variable to allow for if statements to "continue" over multiple paragraphs open_ifs = "" - # initialise the first paragraph if SPLIT_ON_PARAGRAPH is True - if SPLIT_ON_PARAGRAPHS: - title = main_title + "_paragraph_" + str(paragraph_number) - paragraphs_text[title] = "" - subtitle_order.append(title) - # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] @@ -266,18 +265,19 @@ def split_text(file, main_title): title_level = check_for_title(line, in_code_block, curr_dirs) - # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them + # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block # line is a title with a maximum depth of 4 - if title_level > 0 and SPLIT_ON_TITLES: + if title_level > 0: if after_first_title: paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, + last_dir) title = make_valid_title(line[title_level + 1:-1]) - # create an entry for the next file in the paragraphs text dictionary + # create an entry for the file in the paragraphs text dictionary paragraphs_text[title] = open_ifs after_first_title = True @@ -286,10 +286,74 @@ def split_text(file, main_title): # reset link_list link_list = [] - elif title_level > 0 and not SPLIT_ON_TITLES: + # line is not a title + elif after_first_title: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + if title in paragraphs_text.keys() and line != "\n": + paragraphs_text[title] += line + elif line != "\n": + paragraphs_text[title] = line + + # write metadata for the last file + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level]) + + return paragraphs_text, paragraphs_metadata, subtitle_order + + +def split_on_paragraphs(file, main_title): + # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet + after_first_title = False + + # first paragraph number + paragraph_number = 1 + + # start of assuming we are not in a code_block + in_code_block = False + + # define initial dictionaries + paragraphs_text = {} + paragraphs_metadata = {} + + # list to keep track of links in the text + link_list = [] + + # list to keep track of the order of the subtitles + subtitle_order = [] + + # variable to keep track of the title level + title_level = 0 + + # initialise the first paragraph + title = main_title + "_paragraph_" + str(paragraph_number) + paragraphs_text[title] = "" + subtitle_order.append(title) + + # list to keep track of most recent directories on each title level + if LINUX_TUTORIAL not in file: + curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] + else: + curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)] + + with open(file, 'r') as readfile: + + for line in readfile: + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] + + title_level = check_for_title(line, in_code_block, curr_dirs) + + # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block + + # line is a title with a maximum depth of 4 + if title_level > 0: paragraphs_text[title] += line[title_level + 1:] - elif SPLIT_ON_PARAGRAPHS and line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH: + elif line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH: # finish the previous file paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) @@ -306,7 +370,7 @@ def split_text(file, main_title): link_list = [] # line is not a title or the ending of a sufficiently large paragraph - elif after_first_title or SPLIT_ON_PARAGRAPHS: + else: line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) if title in paragraphs_text.keys() and line != "\n": paragraphs_text[title] += line From 2c7025a8994fb0a2b0733be82185e706d1109fe9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 11:23:06 +0200 Subject: [PATCH 075/152] added docstrings to new functions --- .../chatbot_parser.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 1f6b82e8a44..a9797026428 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -212,6 +212,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): def split_text(file, main_title): """ Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata + :param file: the filepath of the file to be split :param main_title: the main title of the file :return paragraphs_text: dictionary containing the split sections of text @@ -226,6 +227,15 @@ def split_text(file, main_title): def split_on_titles(file, main_title): + """ + Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata + + :param file: the filepath of the file to be split + :param main_title: the main title of the file + :return paragraphs_text: dictionary containing the split sections of text + :return paragraphs_metadata: dictionary containing the metadata of each split section of text + :return subtitle_order: list containing all encountered subtitles in order of appearance + """ # start of assuming we haven't encountered a title after_first_title = False @@ -301,6 +311,15 @@ def split_on_titles(file, main_title): def split_on_paragraphs(file, main_title): + """ + Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata + + :param file: the filepath of the file to be split + :param main_title: the main title of the file + :return paragraphs_text: dictionary containing the split sections of text + :return paragraphs_metadata: dictionary containing the metadata of each split section of text + :return subtitle_order: list containing all encountered subtitles in order of appearance + """ # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet after_first_title = False From ae99bb96f830da927f4dcded46d12404af8d16c1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 12:10:18 +0200 Subject: [PATCH 076/152] update test for valid titles --- .../HPC_chatbot_preprocessor/tests/test_make_valid_title.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py index aebecddd0f3..fc704c84b31 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py @@ -6,10 +6,10 @@ @pytest.mark.parametrize("input_string,expected", [ ("", ""), ("A-good-filename-with-dashes", "A-good-filename-with-dashes"), - (" A very good filename beginning and ending in a space ", "A very good filename beginning and ending in a space"), + (" A very good filename beginning and ending in a space ", "A-very-good-filename-beginning-and-ending-in-a-space"), ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"), - ("A filename containing bad characters <>:\"/\\|?*\0", "A filename containing bad characters"), - ("A filename ending with {some jinja garbage}", "A filename ending with") + ("A filename containing bad characters <>:\"/\\|?*\0", "A-filename-containing-bad-characters"), + ("A filename ending with {some jinja garbage}", "A-filename-ending-with") ]) def test_make_valid_title(input_string, expected): assert make_valid_title(input_string) == expected From 084b4210a261e89c81d8e23d31d3e0d1adb7f00b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 14:17:24 +0200 Subject: [PATCH 077/152] fixed problem with splitting os-specific text (metadata not fixed yet) --- .../chatbot_parser.py | 169 ++++++++++++------ 1 file changed, 117 insertions(+), 52 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a9797026428..5739f23fb31 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -80,6 +80,13 @@ # link indicators LINK_MARKER = r'Ā§linkĀ§linkĀ§' +# regex patterns +IF_MANGLED_PATTERNS = { + IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', + ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})', + ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})' + } + ################### define functions ################### @@ -243,9 +250,13 @@ def split_on_titles(file, main_title): in_code_block = False # define initial dictionaries - paragraphs_text = {} + paragraphs_os_free_text = {} + paragraphs_os_text = {} paragraphs_metadata = {} + # variable to keep track of the current paragraph + current_paragraph = "" + # list to keep track of links in the text link_list = [] @@ -258,6 +269,12 @@ def split_on_titles(file, main_title): # variable to allow for if statements to "continue" over multiple paragraphs open_ifs = "" + # variable to keep track of how many if-statements deep the current line is + in_if_statement = 0 + + # variable to indicate that previous section was one with if-statements + previous_contained_if = False + # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] @@ -268,46 +285,63 @@ def split_on_titles(file, main_title): for line in readfile: - # keep track of title level and directory to write to metadata upon discovering a new subtitle - if title_level > 0: - last_title_level = title_level - last_dir = curr_dirs[last_title_level] + # detect if-statements starting or ending on the current line + in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) - title_level = check_for_title(line, in_code_block, curr_dirs) + # only split up if current line is in a fully non-os-specific section + if in_if_statement == 0: - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block + title_level = check_for_title(line, in_code_block, curr_dirs) - # line is a title with a maximum depth of 4 - if title_level > 0: - if after_first_title: - paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, - last_dir) - title = make_valid_title(line[title_level + 1:-1]) + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block - # create an entry for the file in the paragraphs text dictionary - paragraphs_text[title] = open_ifs + # line is a title with a maximum depth of 4 + if title_level > 0: + if after_first_title: + if previous_contained_if: + paragraphs_os_text[title] = current_paragraph + else: + paragraphs_os_free_text[title] = current_paragraph + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + title = make_valid_title(line[title_level + 1:-1]) - after_first_title = True - subtitle_order.append(title) + # create an entry for the file in the paragraphs text dictionary + current_paragraph = open_ifs - # reset link_list - link_list = [] + after_first_title = True + subtitle_order.append(title) + + # reset link_list + link_list = [] + + previous_contained_if = False - # line is not a title - elif after_first_title: + # line is not a title + elif after_first_title: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + if line != "\n": + current_paragraph += line + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] + else: + previous_contained_if = True line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) - if title in paragraphs_text.keys() and line != "\n": - paragraphs_text[title] += line - elif line != "\n": - paragraphs_text[title] = line + if line != "\n": + current_paragraph += line - # write metadata for the last file - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level]) + # write dictionaries for the last file + if previous_contained_if: + paragraphs_os_text[title] = current_paragraph + else: + paragraphs_os_free_text[title] = current_paragraph + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level]) - return paragraphs_text, paragraphs_metadata, subtitle_order + return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order def split_on_paragraphs(file, main_title): @@ -438,20 +472,15 @@ def close_ifs(text): :return prefix: the prefix for the next section """ - patterns = { - IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', - ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', - ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})' - } - if_count = len(re.findall(patterns[IF], text.replace("\n", ""))) - endif_count = len(re.findall(patterns[ENDIF], text.replace("\n", ""))) + if_count = len(re.findall(IF_MANGLED_PATTERNS[IF], text.replace("\n", ""))) + endif_count = len(re.findall(IF_MANGLED_PATTERNS[ENDIF], text.replace("\n", ""))) if IF_MANGLED_PART not in text or if_count == endif_count: return text, "" else: # Find all matches for each pattern matches = [] - for key, pattern in patterns.items(): + for key, pattern in IF_MANGLED_PATTERNS.items(): for match in re.finditer(pattern, text): matches.append(match) @@ -470,11 +499,11 @@ def close_ifs(text): last_if = -1 last_else = -1 for i, if_part in enumerate(open_ifs): - if re.search(patterns[IF], if_part): + if re.search(IF_MANGLED_PATTERNS[IF], if_part): last_if = i - elif re.search(patterns[ELSE], if_part): + elif re.search(IF_MANGLED_PATTERNS[ELSE], if_part): last_else = i - elif re.search(patterns[ENDIF], if_part): + elif re.search(IF_MANGLED_PATTERNS[ENDIF], if_part): changed = True del open_ifs[i] if last_else > last_if: @@ -650,7 +679,7 @@ def make_valid_title(title): valid_filename = re.sub(invalid_chars, '', title) # Strip leading/trailing whitespace - valid_filename = valid_filename.strip().strip('-') + valid_filename = valid_filename.strip().strip('-').replace(' ', '-') return valid_filename @@ -700,7 +729,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or template = Template(paragraphs_text[title]) text[OS] = template.render(OS=OS) - # readjust text to correct overcorrections + # re-adjust text to correct overcorrections text[OS] = re.sub('"' + OS + '"', OS, text[OS]) # check that not all versions are the same @@ -740,8 +769,11 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe metadata = copy.deepcopy(paragraphs_metadata[title]) + file_title = paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + file_title = title + # write text file - with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + ".txt"), 'w') as writefile: + with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile: if LINKS in paragraphs_metadata[title].keys(): adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS]) writefile.write(adapted_text) @@ -771,7 +803,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = OS + "/" metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') - with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + "_metadata.json"), 'w') as writefile: + with open(os.path.join(filepath, file_title + "_metadata.json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) paragraph_numbers[OS] += 1 @@ -803,6 +835,39 @@ def insert_links(text, links): return text, new_links +def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers): + # add first subtitle in front of section again + text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text + + # Unmangle if's to use jinja parser + text = re.sub(IF_MANGLED_PART, "", text) + + for OS in [LINUX, WINDOWS, MACOS]: + + # slightly alter if-statements to be able to use predefined macros + text = re.sub(OS, '"' + OS + '"', text) + + # Use jinja to render a different version of the text for each OS + template = Template(text) + jinja_text = template.render(OS=OS) + + # re-adjust text to correct overcorrections + jinja_text = re.sub('"' + OS + '"', OS, jinja_text) + + with open("jinja_file.txt", 'w') as writefile: + writefile.write(jinja_text) + + # split in right way + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE]) + + # write to files + for os_i, os_subtitle in enumerate(os_subtitle_order): + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) + + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:], os_i + i, filepath, OS, paragraph_numbers) + + def main(): """ main function @@ -828,7 +893,7 @@ def main(): ################### define loop-invariant variables ################### - # variable that keeps track of the source directories + # constant that keeps track of the source directories source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] @@ -848,6 +913,7 @@ def main(): # filenames_generic = {} # filenames_linux = {} # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" + # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files @@ -896,19 +962,18 @@ def main(): jinja_parser(filename, copy_file) # split the text in paragraphs - paragraphs_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title) + paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title) # for every section, either make the whole section generic, or create an os-specific file for each OS for i, subtitle in enumerate(subtitle_order): - # print(subtitle) # generic - if IF_MANGLED_PART not in paragraphs_text[subtitle]: - write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) + if subtitle in paragraphs_os_free_text.keys(): + write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) # os-specific else: - write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers) shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) From cf7f5f0c8a56303c155aea51268abe6ddbfe2944 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 15:33:50 +0200 Subject: [PATCH 078/152] fix for metadata of os-specific sections --- .../chatbot_parser.py | 93 +++++++++++++------ 1 file changed, 65 insertions(+), 28 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 5739f23fb31..10c61725244 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -62,7 +62,7 @@ # Metadata attributes MAIN_TITLE = "main_title" -SUBTITLE = "subtitle (incorrect in some cases, working on a fix)" +SUBTITLE = "subtitle" TITLE_DEPTH = "title_depth" DIRECTORY = "directory" LINKS = "links" @@ -300,11 +300,17 @@ def split_on_titles(file, main_title): # line is a title with a maximum depth of 4 if title_level > 0: if after_first_title: + + # write text of previous file if previous_contained_if: paragraphs_os_text[title] = current_paragraph else: paragraphs_os_free_text[title] = current_paragraph + + # write metadata of previous file paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + + # make a new title title = make_valid_title(line[title_level + 1:-1]) # create an entry for the file in the paragraphs text dictionary @@ -697,11 +703,15 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :return: """ - # make the directory needed for the files that will be written - filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) + if len(paragraphs_text[title]) > 0: + # make the directory needed for the files that will be written + filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers) + else: + # don't write empty files + pass def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): @@ -835,7 +845,7 @@ def insert_links(text, links): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers): +def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers, all_metadata): # add first subtitle in front of section again text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text @@ -860,12 +870,39 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, i, parag # split in right way _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE]) + # prepare variables to fix metadata + total_subtitle_order = subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:] + copy_all_metadata = {**os_specific_metadata, **all_metadata} + # write to files for os_i, os_subtitle in enumerate(os_subtitle_order): - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) + # check that file actually has some content + if len(os_specific_text[os_subtitle]) > 0: + # add the links to the metadata + os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] + + # fix parent in the metadata + parent_i = 0 + parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 + parent = os_specific_metadata[os_subtitle][MAIN_TITLE] + while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): + if copy_all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: + parent = total_subtitle_order[parent_i] + parent_i += 1 + os_specific_metadata[os_subtitle][PARENT_TITLE] = parent + + # fix directory in the metadata + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(copy_all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + + # make a directory to save the files + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:], os_i + i, filepath, OS, paragraph_numbers) + # write to files + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + i, filepath, OS, paragraph_numbers) + else: + # don't write empty files + pass def main(): @@ -893,27 +930,27 @@ def main(): ################### define loop-invariant variables ################### - # constant that keeps track of the source directories - source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - - # list of all the filenames - filenames_generic = {} - filenames_linux = {} - for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if LINUX_TUTORIAL in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) - - # # Temporary variables to test with just one singular file + # # constant that keeps track of the source directories + # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + # os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + # + # # list of all the filenames # filenames_generic = {} # filenames_linux = {} + # for source_directory in source_directories: + # all_items = os.listdir(source_directory) + # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + # for file in files: + # if LINUX_TUTORIAL in source_directory: + # filenames_linux[file] = os.path.join(source_directory, file) + # else: + # filenames_generic[file] = os.path.join(source_directory, file) + + # Temporary variables to test with just one singular file + filenames_generic = {} + filenames_linux = {} # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" - # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" + filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files @@ -973,7 +1010,7 @@ def main(): # os-specific else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers, paragraphs_metadata) shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) From b7c10d3c2764ad91880c5c17aa60d14cd337bc51 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:03:52 +0200 Subject: [PATCH 079/152] clean up temporary version --- .../chatbot_parser.py | 115 +++++++----------- 1 file changed, 42 insertions(+), 73 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 10c61725244..5c1a4b3facd 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -714,54 +714,6 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, pass -def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): - """ - Function that writes text and metadata of os-specific files - - :param title: title of section - :param paragraphs_text: dictionary containing all paragraphs of text - :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text - :param title_order: list containing all subtitles in order - :param title_order_number: order number of the title of the section that is being written - :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS - :return: - """ - text = {} - for OS in [LINUX, WINDOWS, MACOS]: - - # Unmangle if's to use jinja parser - paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title]) - - # slightly alter if-statements to be able to use predefined macros - paragraphs_text[title] = re.sub(OS, '"' + OS + '"', paragraphs_text[title]) - - # Use jinja to render a different version of the text for each OS - template = Template(paragraphs_text[title]) - text[OS] = template.render(OS=OS) - - # re-adjust text to correct overcorrections - text[OS] = re.sub('"' + OS + '"', OS, text[OS]) - - # check that not all versions are the same - unique_texts = set(text.values()) - if len(unique_texts) == 1: - paragraphs_text[title] = text[OS] - write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers) - else: - for OS in [LINUX, WINDOWS, MACOS]: - # check that file actually has some content - if len(text[OS]) > 0: - # define the filepath - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) - - # write the files - write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers=paragraph_numbers) - else: - # don't write empty files - pass - - def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers): """ Function to write files to a certain filepath @@ -845,7 +797,18 @@ def insert_links(text, links): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers, all_metadata): +def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, paragraph_numbers, all_metadata): + """ + Function that splits os-specific sections into subtitles, parses them using jinja and writes them away + + :param text: full os specific section + :param metadata: metadata generated for the full os specific section + :param subtitle_order: order of the subtitles generated by the splitter + :param title_order_number: order number of the section + :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS + :param all_metadata: all metadata generated by the splitter + :return: + """ # add first subtitle in front of section again text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text @@ -871,35 +834,39 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, i, parag _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE]) # prepare variables to fix metadata - total_subtitle_order = subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:] - copy_all_metadata = {**os_specific_metadata, **all_metadata} + total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] + all_metadata.update(os_specific_metadata) # write to files for os_i, os_subtitle in enumerate(os_subtitle_order): # check that file actually has some content if len(os_specific_text[os_subtitle]) > 0: # add the links to the metadata - os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] + if LINKS in metadata.keys(): + os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] # fix parent in the metadata parent_i = 0 parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 parent = os_specific_metadata[os_subtitle][MAIN_TITLE] while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): - if copy_all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: + if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: parent = total_subtitle_order[parent_i] parent_i += 1 os_specific_metadata[os_subtitle][PARENT_TITLE] = parent # fix directory in the metadata - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(copy_all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) + else: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) # make a directory to save the files filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) os.makedirs(filepath, exist_ok=True) # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + i, filepath, OS, paragraph_numbers) + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers) else: # don't write empty files pass @@ -930,27 +897,27 @@ def main(): ################### define loop-invariant variables ################### - # # constant that keeps track of the source directories - # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - # os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - # - # # list of all the filenames - # filenames_generic = {} - # filenames_linux = {} - # for source_directory in source_directories: - # all_items = os.listdir(source_directory) - # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - # for file in files: - # if LINUX_TUTORIAL in source_directory: - # filenames_linux[file] = os.path.join(source_directory, file) - # else: - # filenames_generic[file] = os.path.join(source_directory, file) - - # Temporary variables to test with just one singular file + # constant that keeps track of the source directories + source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + + # list of all the filenames filenames_generic = {} filenames_linux = {} + for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + if LINUX_TUTORIAL in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) + + # # Temporary variables to test with just one singular file + # filenames_generic = {} + # filenames_linux = {} # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" - filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" + # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files @@ -1012,8 +979,10 @@ def main(): else: split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers, paragraphs_metadata) + # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) + os.remove("jinja_file.txt") ################### run the script ################### From 4a441f34ca1ad6f296817fface9c86fe76585250 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:23:57 +0200 Subject: [PATCH 080/152] added command line options for custom macros --- .../chatbot_parser.py | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 5c1a4b3facd..e15a76318c4 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,3 +1,4 @@ +import argparse import copy import json import os @@ -9,13 +10,7 @@ from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template #################### define macro's #################### -# customizable macros -MIN_PARAGRAPH_LENGTH = 160 -MAX_TITLE_DEPTH = 4 -INCLUDE_LINKS_IN_PLAINTEXT = False -SPLIT_ON_TITLES = True -SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES -DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False +# customizable macros (default values are defined at the bottom of the script) # directories PARSED_MDS = "parsed_mds" @@ -987,6 +982,29 @@ def main(): ################### run the script ################### if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot") + + # adding command-line options + + parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") + parser.add_argument("-pl", "--paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") + parser.add_argument("-td", "--title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") + + args = parser.parse_args() + + SPLIT_ON_TITLES = bool(args.split_on_titles) + MIN_PARAGRAPH_LENGTH = args.paragraph_length + MAX_TITLE_DEPTH = args.title_depth + INCLUDE_LINKS_IN_PLAINTEXT = args.links + SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES + DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False + + print(SPLIT_ON_TITLES) + print(MIN_PARAGRAPH_LENGTH) + print(MAX_TITLE_DEPTH) + print(INCLUDE_LINKS_IN_PLAINTEXT) + print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() print("Parsing finished successfully") From 662134fbf7b7bfd53a358f40d43c5a329fd5bab8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:25:34 +0200 Subject: [PATCH 081/152] small fix to macros --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e15a76318c4..0f7345e8149 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -96,7 +96,7 @@ def check_for_title(line, in_code_block, curr_dirs): """ # detect titles match = re.match(r'^#+ ', line) - if match and len(match.group(0)) <= 5 and not in_code_block: + if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1 and not in_code_block: title_length = len(match.group(0)) - 1 if DEEP_DIRECTORIES: curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) From 05eab4ae23dc86f45b6eccbef36e31e8869a30c9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:46:30 +0200 Subject: [PATCH 082/152] clean up test for valid title --- scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py index fc704c84b31..225c368477d 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py @@ -1,5 +1,4 @@ import pytest -import shutil from chatbot_parser import make_valid_title From b85a8fba96a7a5bd02dccef7a7f3cae34420f9b1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:55:33 +0200 Subject: [PATCH 083/152] add a test for write_metadata --- .../tests/test_write_metadata.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py new file mode 100644 index 00000000000..68f1772cb24 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py @@ -0,0 +1,15 @@ +import pytest +import os +from chatbot_parser import write_metadata + + +@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,output", [ + ("", "", [], 1, "", {"main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), + ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2, + os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), + {"main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, + "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), + "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}}) +]) +def test_write_metadata(main_title, subtitle, links, title_level, directory, output): + assert write_metadata(main_title, subtitle, links, title_level, directory) == output From 39a3c99f68464b1614da8bbaaa68adac0aeea889 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 11:41:20 +0200 Subject: [PATCH 084/152] added functionality to split on paragraphs --- .../chatbot_parser.py | 353 +++++++++--------- 1 file changed, 180 insertions(+), 173 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 0f7345e8149..9b6fced3636 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -10,7 +10,13 @@ from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template #################### define macro's #################### -# customizable macros (default values are defined at the bottom of the script) +# customizable macros (customization made possible at the bottom of the script) +SPLIT_ON_TITLES = True +MIN_PARAGRAPH_LENGTH = 160 +MAX_TITLE_DEPTH = 4 +INCLUDE_LINKS_IN_PLAINTEXT = False +SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES +DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False # directories PARSED_MDS = "parsed_mds" @@ -31,6 +37,7 @@ WINDOWS = "windows" MACOS = "macos" GENERIC = "generic" +LINK_OS = {LINUX: "Linux", WINDOWS: "Windows", MACOS: "macOS"} # OS needs different capitalisation for use in links # urls REPO_URL = 'https://github.com/hpcugent/vsc_user_docs' @@ -82,6 +89,11 @@ ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})' } +# filenames (and parts of filenames) +TEMP_JINJA_FILE = "jinja_file.txt" +_PARAGRAPH_ = "_paragraph_" +METADATA_EXTENSION = "_metadata" + ################### define functions ################### @@ -211,12 +223,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): return curr_line, linklist -def split_text(file, main_title): +def split_text(file, main_title, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level + :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level :return paragraphs_text: dictionary containing the split sections of text :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance @@ -225,7 +239,7 @@ def split_text(file, main_title): if SPLIT_ON_TITLES: return split_on_titles(file, main_title) elif SPLIT_ON_PARAGRAPHS: - return split_on_paragraphs(file, main_title) + return split_on_paragraphs(file, main_title, current_paragraph_number, OS) def split_on_titles(file, main_title): @@ -258,12 +272,6 @@ def split_on_titles(file, main_title): # list to keep track of the order of the subtitles subtitle_order = [] - # variable to keep track of the title level - title_level = 0 - - # variable to allow for if statements to "continue" over multiple paragraphs - open_ifs = "" - # variable to keep track of how many if-statements deep the current line is in_if_statement = 0 @@ -309,7 +317,7 @@ def split_on_titles(file, main_title): title = make_valid_title(line[title_level + 1:-1]) # create an entry for the file in the paragraphs text dictionary - current_paragraph = open_ifs + current_paragraph = "" after_first_title = True subtitle_order.append(title) @@ -345,42 +353,46 @@ def split_on_titles(file, main_title): return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order -def split_on_paragraphs(file, main_title): +def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph + :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph :return paragraphs_text: dictionary containing the split sections of text :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance """ - # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet - after_first_title = False - - # first paragraph number - paragraph_number = 1 - # start of assuming we are not in a code_block in_code_block = False # define initial dictionaries - paragraphs_text = {} + paragraphs_os_free_text = {} + paragraphs_os_text = {} paragraphs_metadata = {} + # variable to keep track of the current paragraph + current_paragraph = "" + # list to keep track of links in the text link_list = [] # list to keep track of the order of the subtitles subtitle_order = [] - # variable to keep track of the title level - title_level = 0 + # variable to keep track of how many if-statements deep the current line is + in_if_statement = 0 + + # variable to indicate that previous section was one with if-statements + previous_contained_if = False + + # paragraph number to add to title + paragraph_number = 1 - # initialise the first paragraph - title = main_title + "_paragraph_" + str(paragraph_number) - paragraphs_text[title] = "" - subtitle_order.append(title) + # metadata title + metadata_title = main_title # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: @@ -392,49 +404,85 @@ def split_on_paragraphs(file, main_title): for line in readfile: - # keep track of title level and directory to write to metadata upon discovering a new subtitle - if title_level > 0: - last_title_level = title_level - last_dir = curr_dirs[last_title_level] + # detect if-statements starting or ending on the current line + in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len( + re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) + + # only split up if current line is in a fully non-os-specific section + if in_if_statement == 0: + + title_level = check_for_title(line, in_code_block, curr_dirs) + + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block - title_level = check_for_title(line, in_code_block, curr_dirs) + # check whether a new paragraph should be started + if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= MIN_PARAGRAPH_LENGTH and not in_code_block: - # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block + # create a title for the previous paragraph + if current_paragraph_number == -1: + paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) + else: + paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) + paragraph_number += 1 - # line is a title with a maximum depth of 4 - if title_level > 0: - paragraphs_text[title] += line[title_level + 1:] + # write text of previous file + if previous_contained_if: + paragraphs_os_text[paragraph_title] = current_paragraph + else: + paragraphs_os_free_text[paragraph_title] = current_paragraph - elif line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH: - # finish the previous file - paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + # write metadata of previous file + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir) + subtitle_order.append(paragraph_title) - # start a new file - paragraph_number += 1 - title = make_valid_title(main_title + "_paragraph_" + str(paragraph_number)) - subtitle_order.append(title) + # reset the current paragraph + current_paragraph = "" - # create an entry for the next file in the paragraphs text dictionary - paragraphs_text[title] = open_ifs + # reset link_list + link_list = [] - # reset link_list - link_list = [] + previous_contained_if = False - # line is not a title or the ending of a sufficiently large paragraph + # line is a title with a maximum depth of 4 + elif title_level > 0: + + # make a new title + metadata_title = make_valid_title(line[title_level + 1:-1]) + + line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title) + current_paragraph += line + + # line is not a title or the beginning of a new paragraph + elif line != "\n" or previous_contained_if: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + current_paragraph += line + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] else: + previous_contained_if = True line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) - if title in paragraphs_text.keys() and line != "\n": - paragraphs_text[title] += line - elif line != "\n": - paragraphs_text[title] = line + current_paragraph += line - # write metadata for the last file - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level]) + # create a title for the last paragraph + if current_paragraph_number == -1: + paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) + else: + paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) - return paragraphs_text, paragraphs_metadata, subtitle_order + # write dictionaries for the last file + if previous_contained_if: + paragraphs_os_text[paragraph_title] = current_paragraph + else: + paragraphs_os_free_text[paragraph_title] = current_paragraph + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level]) + subtitle_order.append(paragraph_title) + + return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order def write_metadata(main_title, subtitle, links, title_level, directory): @@ -461,63 +509,6 @@ def write_metadata(main_title, subtitle, links, title_level, directory): return paragraph_metadata -def close_ifs(text): - """ - Function to check whether all if-statements in a section are closed properly. If that is not the case, the function - closes all if-statements at the end of the section and returns a prefix for the next section containing all if-statements - of the section it is processing. This needs to be done because the start of the next section would also be contained within the - last unclosed if-statement of its previous section. - - :param text: the text of the section it checks - :return text: the adapted text where all if-statements are closed - :return prefix: the prefix for the next section - """ - - if_count = len(re.findall(IF_MANGLED_PATTERNS[IF], text.replace("\n", ""))) - endif_count = len(re.findall(IF_MANGLED_PATTERNS[ENDIF], text.replace("\n", ""))) - if IF_MANGLED_PART not in text or if_count == endif_count: - return text, "" - else: - - # Find all matches for each pattern - matches = [] - for key, pattern in IF_MANGLED_PATTERNS.items(): - for match in re.finditer(pattern, text): - matches.append(match) - - # sort the matches according to their start index - matches.sort(key=lambda x: x.start()) - - # extract the strings from the matches - open_ifs = [] - for match in matches: - open_ifs.append(match.group(0)) - - # only include the non-closed if-statements - changed = True - while changed: - changed = False - last_if = -1 - last_else = -1 - for i, if_part in enumerate(open_ifs): - if re.search(IF_MANGLED_PATTERNS[IF], if_part): - last_if = i - elif re.search(IF_MANGLED_PATTERNS[ELSE], if_part): - last_else = i - elif re.search(IF_MANGLED_PATTERNS[ENDIF], if_part): - changed = True - del open_ifs[i] - if last_else > last_if: - del open_ifs[last_else] - del open_ifs[last_if] - break - - # Concatenate all matches into a single string - open_ifs = ''.join(open_ifs) - - return text + (r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}')*(if_count - endif_count), open_ifs - - def jinja_parser(filename, copy_location): """ function that let's jinja do its thing to format the files except for the os-related if-statements @@ -596,7 +587,7 @@ def mangle_os_ifs(line, is_os): constr_match = re.search(r'\{%.*?%}', match.string) if_match = re.search(r'if ', match.group(1)) - if_os_match = re.search(r'if OS ', match.group(1)) + if_os_match = re.search(r'if OS', match.group(1)) endif_match = re.search(r'endif', match.group(1)) else_match = re.search(r'else', match.group(1)) @@ -726,7 +717,6 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe metadata = copy.deepcopy(paragraphs_metadata[title]) - file_title = paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) file_title = title # write text file @@ -757,10 +747,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe if OS == GENERIC: os_part = "" else: - os_part = OS + "/" - metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') + os_part = LINK_OS[OS] + "/" + metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') - with open(os.path.join(filepath, file_title + "_metadata.json"), 'w') as writefile: + with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) paragraph_numbers[OS] += 1 @@ -804,8 +794,6 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or :param all_metadata: all metadata generated by the splitter :return: """ - # add first subtitle in front of section again - text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text # Unmangle if's to use jinja parser text = re.sub(IF_MANGLED_PART, "", text) @@ -819,52 +807,74 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or template = Template(text) jinja_text = template.render(OS=OS) - # re-adjust text to correct overcorrections - jinja_text = re.sub('"' + OS + '"', OS, jinja_text) - - with open("jinja_file.txt", 'w') as writefile: - writefile.write(jinja_text) - - # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE]) - - # prepare variables to fix metadata - total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] - all_metadata.update(os_specific_metadata) - - # write to files - for os_i, os_subtitle in enumerate(os_subtitle_order): - # check that file actually has some content - if len(os_specific_text[os_subtitle]) > 0: - # add the links to the metadata - if LINKS in metadata.keys(): - os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] - - # fix parent in the metadata - parent_i = 0 - parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 - parent = os_specific_metadata[os_subtitle][MAIN_TITLE] - while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): - if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: - parent = total_subtitle_order[parent_i] - parent_i += 1 - os_specific_metadata[os_subtitle][PARENT_TITLE] = parent - - # fix directory in the metadata - if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) - else: - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + # add first subtitle in front of section again + if len(jinja_text) != 0: + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text - # make a directory to save the files - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) + # re-adjust text to correct overcorrections + jinja_text = re.sub('"' + OS + '"', OS, jinja_text) + + if LINUX_TUTORIAL not in metadata[DIRECTORY]: + with open(TEMP_JINJA_FILE, 'w') as writefile: + writefile.write(jinja_text) + + # split in right way + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) - # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers) else: - # don't write empty files - pass + os.makedirs(LINUX_TUTORIAL, exist_ok=True) + with open(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), 'w') as writefile: + writefile.write(jinja_text) + + # split in right way + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + + # prepare variables to fix metadata + total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] + all_metadata.update(os_specific_metadata) + + # write to files + for os_i, os_subtitle in enumerate(os_subtitle_order): + # check that file actually has some content + if len(os_specific_text[os_subtitle]) > 0: + # add the links to the metadata + if LINKS in metadata.keys(): + os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] + + # fix parent in the metadata + parent_i = 0 + parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 + parent = os_specific_metadata[os_subtitle][MAIN_TITLE] + + while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): + if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: + parent = total_subtitle_order[parent_i] + parent_i += 1 + + if SPLIT_ON_PARAGRAPHS and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]: + os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE] + else: + os_specific_metadata[os_subtitle][PARENT_TITLE] = parent + + # fix directory in the metadata if needed + if DEEP_DIRECTORIES: + if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) + else: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + + # make a directory to save the files + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) + + # write to files + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers) + else: + # don't write empty files + pass + else: + # don't split empty texts + pass def main(): @@ -913,7 +923,7 @@ def main(): # filenames_linux = {} # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" - # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" + # filenames_linux["common_pitfalls.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/common_pitfalls.md" # for loops over all files for filenames in [filenames_generic, filenames_linux]: @@ -977,7 +987,9 @@ def main(): # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) - os.remove("jinja_file.txt") + shutil.rmtree(LINUX_TUTORIAL, ignore_errors=True) + if os.path.exists(TEMP_JINJA_FILE): + os.remove(TEMP_JINJA_FILE) ################### run the script ################### @@ -987,24 +999,19 @@ def main(): # adding command-line options parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") - parser.add_argument("-pl", "--paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") - parser.add_argument("-td", "--title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") + parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") args = parser.parse_args() SPLIT_ON_TITLES = bool(args.split_on_titles) - MIN_PARAGRAPH_LENGTH = args.paragraph_length - MAX_TITLE_DEPTH = args.title_depth + MIN_PARAGRAPH_LENGTH = args.min_paragraph_length + MAX_TITLE_DEPTH = args.max_title_depth INCLUDE_LINKS_IN_PLAINTEXT = args.links SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False - print(SPLIT_ON_TITLES) - print(MIN_PARAGRAPH_LENGTH) - print(MAX_TITLE_DEPTH) - print(INCLUDE_LINKS_IN_PLAINTEXT) - print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() print("Parsing finished successfully") From af9e6cca6ead2ded6ac54500e7e84ed26939aa12 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 12:02:22 +0200 Subject: [PATCH 085/152] clean up --- .../chatbot_parser.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 9b6fced3636..ca861b86e81 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -676,7 +676,7 @@ def make_valid_title(title): return valid_filename -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): """ Function that writes text and metadata of a generic (non-os-specific) file @@ -685,7 +685,6 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written - :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ @@ -694,13 +693,13 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) else: # don't write empty files pass -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers): +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): """ Function to write files to a certain filepath @@ -711,7 +710,6 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :param title_order_number: order number of the title of the section that is being written :param filepath: filepath to write files to :param OS: OS to be included in the metadata - :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ @@ -728,18 +726,22 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe writefile.write(text) # write metadata + # add previous subtitle if title_order_number != 0: metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1] else: metadata[PREVIOUS_SUBTITLE] = None + # add next subtitle if title_order_number != len(title_order) - 1: metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1] else: metadata[NEXT_SUBTITLE] = None + # add OS metadata[METADATA_OS] = OS + # add reference link if bool(LINUX_TUTORIAL in paragraphs_metadata[title][DIRECTORY]): linux_part = LINUX_TUTORIAL + "/" else: @@ -750,11 +752,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = LINK_OS[OS] + "/" metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') + # write metadata to file with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) - paragraph_numbers[OS] += 1 - def insert_links(text, links): """ @@ -782,7 +783,7 @@ def insert_links(text, links): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, paragraph_numbers, all_metadata): +def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata): """ Function that splits os-specific sections into subtitles, parses them using jinja and writes them away @@ -790,7 +791,6 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or :param metadata: metadata generated for the full os specific section :param subtitle_order: order of the subtitles generated by the splitter :param title_order_number: order number of the section - :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :param all_metadata: all metadata generated by the splitter :return: """ @@ -868,7 +868,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or os.makedirs(filepath, exist_ok=True) # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers) + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS) else: # don't write empty files pass @@ -978,11 +978,11 @@ def main(): # generic if subtitle in paragraphs_os_free_text.keys(): - write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) + write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i) # os-specific else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers, paragraphs_metadata) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers) # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) From f4163a7d3cb94ab4962f7c24d8a78906064d59a6 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 12:04:32 +0200 Subject: [PATCH 086/152] clean up --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index ca861b86e81..6be841ae2e5 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -994,7 +994,7 @@ def main(): ################### run the script ################### if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot") + parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") # adding command-line options From 833f96488dacf631782afe14081202021f50e9f0 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 12:12:48 +0200 Subject: [PATCH 087/152] further clean up and added shebang --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 6be841ae2e5..76627065004 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import argparse import copy import json @@ -958,9 +960,6 @@ def main(): # variable that keeps track of the directories that are used to write in at different levels curr_dirs = [filename[:-3] for _ in range(5)] - # dictionary that keeps track of the paragraph numbers - paragraph_numbers = {GENERIC: 1, LINUX: 1, WINDOWS: 1, MACOS: 1} - ################### actually parse the md file ################### # create directories for the source markdown file @@ -982,7 +981,7 @@ def main(): # os-specific else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata) # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) From 79b1a56d5a4742c5e96663f1b4c0b41fba68728d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 12:13:50 +0200 Subject: [PATCH 088/152] clean up --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 76627065004..c7dbe205737 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1010,7 +1010,7 @@ def main(): INCLUDE_LINKS_IN_PLAINTEXT = args.links SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False - - print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") + if DEEP_DIRECTORIES: + print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() print("Parsing finished successfully") From cec154c64ac9cf4bb34cc9e1ccd9f3d96ea656e7 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 13:54:56 +0200 Subject: [PATCH 089/152] added test for if mangler --- .../tests/example_files/example_text_1.md | 31 +++++++++++ .../if_mangler_1_input.md | 4 ++ .../if_mangler_1_output.md | 4 ++ .../if_mangler_2_input.md | 7 +++ .../if_mangler_2_output.md | 7 +++ .../if_mangler_3_input.md | 6 ++ .../if_mangler_3_output.md | 6 ++ .../if_mangler_4_input.md | 4 ++ .../if_mangler_4_output.md | 4 ++ .../if_mangler_5_input.md | 11 ++++ .../if_mangler_5_output.md | 11 ++++ .../if_mangler_6_input.md | 8 +++ .../if_mangler_6_output.md | 8 +++ .../if_mangler_7_input.md | 9 +++ .../if_mangler_7_output.md | 9 +++ .../if_mangler_test_files/if_mangler_input.md | 55 +++++++++++++++++++ .../if_mangler_output.md | 55 +++++++++++++++++++ .../tests/test_if_mangler.py | 32 +++++++++++ 18 files changed, 271 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md new file mode 100644 index 00000000000..9b810c3f41a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md @@ -0,0 +1,31 @@ +# Main title + +## Subtitle 1 + +blablabla +blablablabla + +### Subtitle 2 partly generic + +blablabla generic +blablabla generic +{% if OS == windows %}blablabla windows +blablabla windows with a [link](windows.md) + +#### Subtitle 3 Windows specific + +blablabla windows +blablablabla windows +{% else %}blablabla Linux macOS +blablablabla Linux macOS with a [link](linuxmacos.md) + +#### Subtitle 4 Linux and macOS specific + +blablabla Linux macOS +blablablabla Linux macOS +{% endif %} +blablabla generic with a [link](generic.md) + +## Subtitle 5 generic + +blablabla \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md new file mode 100644 index 00000000000..6a74b3c0181 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md @@ -0,0 +1,4 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md new file mode 100644 index 00000000000..2f9cdc38294 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md @@ -0,0 +1,4 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md new file mode 100644 index 00000000000..360a4a59ba3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md new file mode 100644 index 00000000000..798dcf6db24 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md new file mode 100644 index 00000000000..d93125a5971 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md new file mode 100644 index 00000000000..02141961338 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md new file mode 100644 index 00000000000..cc15fae1df1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md new file mode 100644 index 00000000000..cc15fae1df1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md new file mode 100644 index 00000000000..bdb288474e2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md new file mode 100644 index 00000000000..10443eb67a4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md new file mode 100644 index 00000000000..0731ee3588c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md new file mode 100644 index 00000000000..cd37117cb00 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md new file mode 100644 index 00000000000..6a72a338527 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md new file mode 100644 index 00000000000..dfe342ebfb1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md new file mode 100644 index 00000000000..fb8c1f8b539 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md @@ -0,0 +1,55 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} + +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} + +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} + +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md new file mode 100644 index 00000000000..796e94348fa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md @@ -0,0 +1,55 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} + +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} + +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} + +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py new file mode 100644 index 00000000000..17053fe705c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -0,0 +1,32 @@ +import pytest +import os +import shutil +from chatbot_parser import mangle_ifs + + +@pytest.mark.parametrize("input_file,output_file", [ + ("if_mangler_1_input.md", "if_mangler_1_output.md"), + ("if_mangler_2_input.md", "if_mangler_2_output.md"), + ("if_mangler_3_input.md", "if_mangler_3_output.md"), + ("if_mangler_4_input.md", "if_mangler_4_output.md"), + ("if_mangler_5_input.md", "if_mangler_5_output.md"), + ("if_mangler_6_input.md", "if_mangler_6_output.md"), + ("if_mangler_7_input.md", "if_mangler_7_output.md") +]) +def test_if_mangler(input_file, output_file): + # make directory + os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) + + # make filepaths + input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file) + expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file) + actual_output_file_path = os.path.join("if_mangled_files", input_file) + mangle_ifs(input_file_path, input_file) + + # check every line + with open(expected_output_file_path, "r") as expected_read_file: + with open(actual_output_file_path, "r") as actual_read_file: + assert all([expected_line == actual_line for expected_line, actual_line in zip(expected_read_file, actual_read_file)]) + + # remove directory + shutil.rmtree("if_mangled_files", ignore_errors=True) From 2f4a277677ea9f20e8bad455b0e583dd1bf5b028 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 16:26:10 +0200 Subject: [PATCH 090/152] clean up --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c7dbe205737..d91cd0df7d7 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -890,17 +890,9 @@ def main(): shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) # make the necessary directories - if not os.path.exists(COPIES): - os.mkdir(COPIES) - - if not os.path.exists(os.path.join(COPIES, LINUX_TUTORIAL)): - os.mkdir(os.path.join(COPIES, LINUX_TUTORIAL)) - - if not os.path.exists(PARSED_MDS): - os.mkdir(PARSED_MDS) - - if not os.path.exists(IF_MANGLED_FILES): - os.mkdir(IF_MANGLED_FILES) + for directory in [COPIES, os.path.join(COPIES, LINUX_TUTORIAL), PARSED_MDS, IF_MANGLED_FILES]: + if not os.path.exists(directory): + os.makedirs(directory) ################### define loop-invariant variables ################### From cd0c8ebad9ddc2ec25ef987d88d945215cdf5070 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 17:20:20 +0200 Subject: [PATCH 091/152] clean up customizable options --- .../chatbot_parser.py | 132 ++++++++++-------- 1 file changed, 72 insertions(+), 60 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index d91cd0df7d7..c262f112759 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,13 +12,13 @@ from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template #################### define macro's #################### -# customizable macros (customization made possible at the bottom of the script) -SPLIT_ON_TITLES = True -MIN_PARAGRAPH_LENGTH = 160 -MAX_TITLE_DEPTH = 4 -INCLUDE_LINKS_IN_PLAINTEXT = False -SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES -DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False +# options +SPLIT_ON_TITLES = "SPLIT_ON_TITLES" +MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH" +MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH" +INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT" +SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS" +DEEP_DIRECTORIES = "DEEP_DIRECTORIES" # directories PARSED_MDS = "parsed_mds" @@ -99,24 +99,25 @@ ################### define functions ################### -def check_for_title(line, in_code_block, curr_dirs): +def check_for_title(line, in_code_block, curr_dirs, options): """ function that checks for titles in the current line. Used by split_text to split the text among the subtitles :param line: the current line to be checked for a title :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found + :param options: dictionary containing the options given by the user :return title_length: The amount of hashtags in front of the title on the current line """ # detect titles match = re.match(r'^#+ ', line) - if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1 and not in_code_block: + if match and len(match.group(0)) <= options[MAX_TITLE_DEPTH] + 1 and not in_code_block: title_length = len(match.group(0)) - 1 - if DEEP_DIRECTORIES: + if options[DEEP_DIRECTORIES]: curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) # update the higher order current directories - for i in range(title_length + 1, MAX_TITLE_DEPTH + 1): + for i in range(title_length + 1, options[MAX_TITLE_DEPTH] + 1): curr_dirs[i] = curr_dirs[title_length] return title_length @@ -225,12 +226,13 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): return curr_line, linklist -def split_text(file, main_title, current_paragraph_number=-1, OS=GENERIC): +def split_text(file, main_title, options, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param options: dictionary containing the options given by the user :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level :return paragraphs_text: dictionary containing the split sections of text @@ -238,18 +240,19 @@ def split_text(file, main_title, current_paragraph_number=-1, OS=GENERIC): :return subtitle_order: list containing all encountered subtitles in order of appearance """ - if SPLIT_ON_TITLES: - return split_on_titles(file, main_title) - elif SPLIT_ON_PARAGRAPHS: - return split_on_paragraphs(file, main_title, current_paragraph_number, OS) + if options[SPLIT_ON_TITLES]: + return split_on_titles(file, main_title, options) + elif options[SPLIT_ON_PARAGRAPHS]: + return split_on_paragraphs(file, main_title, options, current_paragraph_number, OS) -def split_on_titles(file, main_title): +def split_on_titles(file, main_title, options): """ Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param options: dictionary containing the options given by the user :return paragraphs_text: dictionary containing the split sections of text :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance @@ -282,9 +285,9 @@ def split_on_titles(file, main_title): # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: - curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] + curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] else: - curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)] + curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)] with open(file, 'r') as readfile: @@ -296,7 +299,7 @@ def split_on_titles(file, main_title): # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: - title_level = check_for_title(line, in_code_block, curr_dirs) + title_level = check_for_title(line, in_code_block, curr_dirs, options) # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): @@ -355,12 +358,13 @@ def split_on_titles(file, main_title): return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order -def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERIC): +def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param options: dictionary containing the options given by the user :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph :return paragraphs_text: dictionary containing the split sections of text @@ -398,9 +402,9 @@ def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERI # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: - curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] + curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] else: - curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)] + curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)] with open(file, 'r') as readfile: @@ -413,14 +417,14 @@ def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERI # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: - title_level = check_for_title(line, in_code_block, curr_dirs) + title_level = check_for_title(line, in_code_block, curr_dirs, options) # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block # check whether a new paragraph should be started - if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= MIN_PARAGRAPH_LENGTH and not in_code_block: + if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block: # create a title for the previous paragraph if current_paragraph_number == -1: @@ -678,7 +682,7 @@ def make_valid_title(title): return valid_filename -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options): """ Function that writes text and metadata of a generic (non-os-specific) file @@ -687,6 +691,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written + :param options: dictionary containing the options given by the user :return: """ @@ -695,13 +700,13 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options) else: # don't write empty files pass -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options): """ Function to write files to a certain filepath @@ -712,6 +717,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :param title_order_number: order number of the title of the section that is being written :param filepath: filepath to write files to :param OS: OS to be included in the metadata + :param options: dictionary containing the options given by the user :return: """ @@ -722,7 +728,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe # write text file with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile: if LINKS in paragraphs_metadata[title].keys(): - adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS]) + adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS], options) writefile.write(adapted_text) else: writefile.write(text) @@ -759,12 +765,13 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe json.dump(metadata, writefile, indent=4) -def insert_links(text, links): +def insert_links(text, links, options): """ Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT :param text: The plaintext that needs to be adapted :param links: The links that might need to be inserted + :param options: dictionary containing the options given by the user :return text: The adapted plaintext :return links: The links that were actually present in the text """ @@ -773,7 +780,7 @@ def insert_links(text, links): new_links = {} for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text): present_links.append(link_number.group(1)) - if INCLUDE_LINKS_IN_PLAINTEXT: + if options[INCLUDE_LINKS_IN_PLAINTEXT]: text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text) else: text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text) @@ -785,7 +792,7 @@ def insert_links(text, links): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata): +def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options): """ Function that splits os-specific sections into subtitles, parses them using jinja and writes them away @@ -794,6 +801,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or :param subtitle_order: order of the subtitles generated by the splitter :param title_order_number: order number of the section :param all_metadata: all metadata generated by the splitter + :param options: dictionary containing the options given by the user :return: """ @@ -821,7 +829,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or writefile.write(jinja_text) # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) else: os.makedirs(LINUX_TUTORIAL, exist_ok=True) @@ -829,7 +837,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or writefile.write(jinja_text) # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) # prepare variables to fix metadata total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] @@ -853,13 +861,13 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or parent = total_subtitle_order[parent_i] parent_i += 1 - if SPLIT_ON_PARAGRAPHS and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]: + if options[SPLIT_ON_PARAGRAPHS] and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]: os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE] else: os_specific_metadata[os_subtitle][PARENT_TITLE] = parent # fix directory in the metadata if needed - if DEEP_DIRECTORIES: + if options[DEEP_DIRECTORIES]: if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) else: @@ -870,7 +878,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or os.makedirs(filepath, exist_ok=True) # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS) + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options) else: # don't write empty files pass @@ -884,6 +892,28 @@ def main(): main function :return: """ + parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") + + # adding command-line options + + parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") + parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") + parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one") + + args = parser.parse_args() + + options = {SPLIT_ON_TITLES: bool(args.split_on_titles), + SPLIT_ON_PARAGRAPHS: not args.split_on_titles, + MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, + MAX_TITLE_DEPTH: args.max_title_depth, + INCLUDE_LINKS_IN_PLAINTEXT: args.links, + DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} + + if options[DEEP_DIRECTORIES]: + print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") + # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason shutil.rmtree(PARSED_MDS, ignore_errors=True) shutil.rmtree(COPIES, ignore_errors=True) @@ -962,18 +992,18 @@ def main(): jinja_parser(filename, copy_file) # split the text in paragraphs - paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title) + paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) # for every section, either make the whole section generic, or create an os-specific file for each OS for i, subtitle in enumerate(subtitle_order): # generic if subtitle in paragraphs_os_free_text.keys(): - write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i) + write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options) # os-specific else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options) # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) @@ -982,27 +1012,9 @@ def main(): if os.path.exists(TEMP_JINJA_FILE): os.remove(TEMP_JINJA_FILE) + print("Parsing finished successfully") + ################### run the script ################### if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") - - # adding command-line options - - parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") - parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") - parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") - - args = parser.parse_args() - - SPLIT_ON_TITLES = bool(args.split_on_titles) - MIN_PARAGRAPH_LENGTH = args.min_paragraph_length - MAX_TITLE_DEPTH = args.max_title_depth - INCLUDE_LINKS_IN_PLAINTEXT = args.links - SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES - DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False - if DEEP_DIRECTORIES: - print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() - print("Parsing finished successfully") From 3be262a84c2a574239a554a08b1e760322b470ee Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 09:57:58 +0200 Subject: [PATCH 092/152] further adapt the script to be able to test it --- .../chatbot_parser.py | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c262f112759..951fea42302 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -887,29 +887,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or pass -def main(): +def main(options): """ main function :return: """ - parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") - - # adding command-line options - - parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") - parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") - parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") - parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one") - - args = parser.parse_args() - - options = {SPLIT_ON_TITLES: bool(args.split_on_titles), - SPLIT_ON_PARAGRAPHS: not args.split_on_titles, - MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, - MAX_TITLE_DEPTH: args.max_title_depth, - INCLUDE_LINKS_IN_PLAINTEXT: args.links, - DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} if options[DEEP_DIRECTORIES]: print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") @@ -1017,4 +999,23 @@ def main(): ################### run the script ################### if __name__ == '__main__': - main() + parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") + + # adding command-line options + + parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") + parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") + parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one") + + args = parser.parse_args() + + options = {SPLIT_ON_TITLES: bool(args.split_on_titles), + SPLIT_ON_PARAGRAPHS: not args.split_on_titles, + MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, + MAX_TITLE_DEPTH: args.max_title_depth, + INCLUDE_LINKS_IN_PLAINTEXT: args.links, + DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} + + main(options) From 1d32aab468c7d3698c69761c0783efd99196cdf1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 10:11:32 +0200 Subject: [PATCH 093/152] make changes to usage in command line to be more intuitive --- .../chatbot_parser.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 951fea42302..e0741a9a347 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1003,19 +1003,19 @@ def main(options): # adding command-line options - parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") - parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is disabled (default: 160)") + parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") - parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one") + parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") args = parser.parse_args() - options = {SPLIT_ON_TITLES: bool(args.split_on_titles), - SPLIT_ON_PARAGRAPHS: not args.split_on_titles, - MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, - MAX_TITLE_DEPTH: args.max_title_depth, - INCLUDE_LINKS_IN_PLAINTEXT: args.links, - DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} - - main(options) + options_dict = {SPLIT_ON_TITLES: args.split_on_titles, + SPLIT_ON_PARAGRAPHS: not args.split_on_titles, + MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, + MAX_TITLE_DEPTH: args.max_title_depth, + INCLUDE_LINKS_IN_PLAINTEXT: args.links, + DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} + + main(options_dict) From 5902c96c19985f4225a34f7d081e294482bedcce Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 10:37:51 +0200 Subject: [PATCH 094/152] first revised version of the README --- scripts/HPC_chatbot_preprocessor/README.md | 67 ++++++++++++++++------ 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 55996e0bef5..86bea6b9ed7 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -1,6 +1,44 @@ # Chatbot parser -`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. +`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. + +## Usage + +The script can be ran in a shell environment with the following command: + +```shell +python chatbot_parser.py +``` + +This command has the following possible options: + +```shell +chatbot_parser.py [-h] [-st SPLIT_ON_TITLES] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] +``` + +### `h`/`help` + +Display the help message + +### `st`/`split_on_titles` + +Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length. + +### `pl`/`min_paragraph_length` + +This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 160 characters. This options only works if `split_on_titles` is not enabled. + +### `td`/`max_title_depth` + +This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4. + +### `l`/`links` + +Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext. + +### `dd`/`deep_directories` + +Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled. ## Generated file structure @@ -11,22 +49,17 @@ The generated directory structure is written as a subdirectory of `parsed_mds`. Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. -These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. - -Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on . +Both the generic and each of the three os-specific directories then contain a directory for each source file. -## Requirements +If the option `deep_directories` is not enabled, all paragraphs of the source file and their corresponding metadata will be saved in this directory. The (processed) plaintext of the paragraph is written to a `.txt` file and the metadata is written to a `.json` file. -- The required Python packages are listed in `requirements.txt` -- [Pandoc](https://pandoc.org/installing.html) must be installed and must be added to the system PATH +If the option `deep_directories` is enabled, the directory of each source file will contain a subdirectory structure corresponding to the structure of the subtitles at different levels in the source file. Each subtitle in the source file corresponds to a directory nested in the directory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). -## Usage +Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and a `.json` file with the metadata of that section. -The script can be ran in a shell environment with the following command: +## Requirements -```shell -python chatbot_parser.py -``` +- The required Python packages are listed in `requirements.txt` ## Restrictions on source-files @@ -102,13 +135,9 @@ endif This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. -### Allowed html syntax +### html syntax -The script contains a list of html syntax keywords it filters out. If more html syntax keywords are used in the future, it suffices to add them to this list to adapt the script to filter them out. The current list is: -``` -["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] -``` -The script is also adapted to take into consideration structures like `
` and retain the link. +The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. ### Markdown comments @@ -121,4 +150,4 @@ Any comments within the markdown files (for example TODO's) should follow the fo ### Long filepaths -Due to the nature of this script, it can generate large directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long. +Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`. From 6e488005ed15345e34878c7a9ee7944d554a42ef Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 11:00:58 +0200 Subject: [PATCH 095/152] added docstring to main function --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e0741a9a347..282e1607031 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -890,6 +890,14 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or def main(options): """ main function + + :param options: dictionary containing the options specified by the user to run the script: + {SPLIT_ON_TITLES: boolean indicating whether to split on titles, + SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES), + MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph, + MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, + INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, + DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not} :return: """ From 0bc440bc71dc0cb2a01fc799db5566c112f0c481 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 11:22:23 +0200 Subject: [PATCH 096/152] include chatbot_prepprocessor --- scripts/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/README.md b/scripts/README.md index eed5a73e4d5..a88bd42cc46 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,3 +1,4 @@ Scripts that can be used to automatically generate markdown files, can be found here. -* [`available_software`](available_software): script to generate overview of available environment modules; \ No newline at end of file +* [`available_software`](available_software): script to generate overview of available environment modules; +* [`chatbot_preprocessor`](HPC_chatbot_preprocessor): script to generate input files for the chatbot; \ No newline at end of file From e6e6023c068b8c512af808cb40d4bd1dd68c1603 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 12:26:38 +0200 Subject: [PATCH 097/152] added options for source and destination directories --- scripts/HPC_chatbot_preprocessor/README.md | 10 +- .../chatbot_parser.py | 170 ++++++++---------- 2 files changed, 86 insertions(+), 94 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 86bea6b9ed7..82aaa9b7e3c 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -13,13 +13,21 @@ python chatbot_parser.py This command has the following possible options: ```shell -chatbot_parser.py [-h] [-st SPLIT_ON_TITLES] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] +chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] ``` ### `h`/`help` Display the help message +### `src`/`source` + +This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`). + +### `dst`/`destination` + +This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script. + ### `st`/`split_on_titles` Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 282e1607031..245c5d68f51 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -13,6 +13,8 @@ #################### define macro's #################### # options +SOURCE_DIRECTORY = "SOURCE_DIRECTORY" +DESTINATION_DIRECTORY = "DESTINATION_DIRECTORY" SPLIT_ON_TITLES = "SPLIT_ON_TITLES" MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH" MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH" @@ -284,10 +286,7 @@ def split_on_titles(file, main_title, options): previous_contained_if = False # list to keep track of most recent directories on each title level - if LINUX_TUTORIAL not in file: - curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] - else: - curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)] + curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] with open(file, 'r') as readfile: @@ -515,12 +514,13 @@ def write_metadata(main_title, subtitle, links, title_level, directory): return paragraph_metadata -def jinja_parser(filename, copy_location): +def jinja_parser(filename, copy_location, options): """ function that let's jinja do its thing to format the files except for the os-related if-statements :param filename: the name of the file that needs to be formatted using jinja :param copy_location: the location of the file that needs to be formatted using jinja + :param options: dictionary containing the options given by the user :return: """ # YAML file location @@ -539,10 +539,10 @@ def jinja_parser(filename, copy_location): combined_context = {**words_dict, **additional_context} # Mangle the OS-related if-statements - mangle_ifs(copy_location, filename) + mangle_ifs(copy_location, filename, options) # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath=[IF_MANGLED_FILES, os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR)]), FunctionLoader(load_macros)]) + template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)]) templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) @@ -642,18 +642,19 @@ def mangle_os_ifs(line, is_os): return line, is_os -def mangle_ifs(directory, filename): +def mangle_ifs(directory, filename, options): """ function that writes the if-mangled version of a file to a location where the jinja parser will use it :param directory: the directory of the file to be if mangled :param filename: the filename of the file to be mangled + :param options: dictionary containing the options given by the user :return: """ # variable to keep track of latest if-statement scope is_os = NON_OS_IF - with open(os.path.join(IF_MANGLED_FILES, filename), 'w') as write_file: + with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: new_line, is_os = mangle_os_ifs(line, is_os) @@ -682,7 +683,7 @@ def make_valid_title(title): return valid_filename -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options): +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options, is_linux_tutorial): """ Function that writes text and metadata of a generic (non-os-specific) file @@ -692,21 +693,22 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return: """ if len(paragraphs_text[title]) > 0: # make the directory needed for the files that will be written - filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) + filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial) else: # don't write empty files pass -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options): +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options, is_linux_tutorial): """ Function to write files to a certain filepath @@ -718,6 +720,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :param filepath: filepath to write files to :param OS: OS to be included in the metadata :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return: """ @@ -750,7 +753,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe metadata[METADATA_OS] = OS # add reference link - if bool(LINUX_TUTORIAL in paragraphs_metadata[title][DIRECTORY]): + if is_linux_tutorial: linux_part = LINUX_TUTORIAL + "/" else: linux_part = "" @@ -792,7 +795,7 @@ def insert_links(text, links, options): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options): +def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options, is_linux_tutorial): """ Function that splits os-specific sections into subtitles, parses them using jinja and writes them away @@ -802,6 +805,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or :param title_order_number: order number of the section :param all_metadata: all metadata generated by the splitter :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return: """ @@ -824,20 +828,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or # re-adjust text to correct overcorrections jinja_text = re.sub('"' + OS + '"', OS, jinja_text) - if LINUX_TUTORIAL not in metadata[DIRECTORY]: - with open(TEMP_JINJA_FILE, 'w') as writefile: - writefile.write(jinja_text) + with open(TEMP_JINJA_FILE, 'w') as writefile: + writefile.write(jinja_text) - # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) - - else: - os.makedirs(LINUX_TUTORIAL, exist_ok=True) - with open(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), 'w') as writefile: - writefile.write(jinja_text) - - # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + # split in right way + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) # prepare variables to fix metadata total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] @@ -874,11 +869,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) # make a directory to save the files - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) + filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) os.makedirs(filepath, exist_ok=True) # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options) + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial) else: # don't write empty files pass @@ -905,32 +900,27 @@ def main(options): print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - shutil.rmtree(PARSED_MDS, ignore_errors=True) - shutil.rmtree(COPIES, ignore_errors=True) - shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) # make the necessary directories - for directory in [COPIES, os.path.join(COPIES, LINUX_TUTORIAL), PARSED_MDS, IF_MANGLED_FILES]: + for directory in [COPIES, PARSED_MDS, IF_MANGLED_FILES]: + directory = os.path.join(options[DESTINATION_DIRECTORY], directory) if not os.path.exists(directory): os.makedirs(directory) ################### define loop-invariant variables ################### # constant that keeps track of the source directories - source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + source_directory = options[SOURCE_DIRECTORY] # list of all the filenames - filenames_generic = {} - filenames_linux = {} - for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if LINUX_TUTORIAL in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) + filenames = {} + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + filenames[file] = os.path.join(source_directory, file) # # Temporary variables to test with just one singular file # filenames_generic = {} @@ -940,65 +930,55 @@ def main(options): # filenames_linux["common_pitfalls.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/common_pitfalls.md" # for loops over all files - for filenames in [filenames_generic, filenames_linux]: - for filename in filenames.keys(): - ################### define/reset loop specific variables ################### + for filename in filenames.keys(): + ################### define/reset loop specific variables ################### - # variable that keeps track of whether file is part of the linux tutorial - is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename]) + # boolean indicating whether the current file is part of the linux tutorial + is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename]) - # make a copy of the original file in order to make sure the original does not get altered - if is_linux_tutorial: - copy_file = os.path.join(COPIES, LINUX_TUTORIAL, filename) - else: - copy_file = os.path.join(COPIES, filename) - shutil.copyfile(filenames[filename], copy_file) - - # variable that keeps track of the directories that are used to write in at different levels - if is_linux_tutorial: - root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR, LINUX_TUTORIAL) - root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX, LINUX_TUTORIAL) - root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS, LINUX_TUTORIAL) - root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS, LINUX_TUTORIAL) - else: - root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR) - root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX) - root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) - root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS) + # make a copy of the original file in order to make sure the original does not get altered + copy_file = os.path.join(options[DESTINATION_DIRECTORY], COPIES, filename) + shutil.copyfile(filenames[filename], copy_file) - # variable for the main title (needed for reference links) - main_title = filename[:-3] + # variable that keeps track of the directories that are used to write in at different levels + root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR) + root_dir_os_specific_linux = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, LINUX) + root_dir_os_specific_windows = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) + root_dir_os_specific_macos = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, MACOS) - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for _ in range(5)] + # variable for the main title (needed for reference links) + main_title = filename[:-3] - ################### actually parse the md file ################### + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for _ in range(5)] - # create directories for the source markdown file - for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - os.makedirs(directory, exist_ok=True) + ################### actually parse the md file ################### - # process the jinja macros - jinja_parser(filename, copy_file) + # create directories for the source markdown file + for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: + os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True) - # split the text in paragraphs - paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) + # process the jinja macros + jinja_parser(filename, copy_file, options) - # for every section, either make the whole section generic, or create an os-specific file for each OS - for i, subtitle in enumerate(subtitle_order): + # split the text in paragraphs + paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) - # generic - if subtitle in paragraphs_os_free_text.keys(): - write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options) + # for every section, either make the whole section generic, or create an os-specific file for each OS + for i, subtitle in enumerate(subtitle_order): - # os-specific - else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options) + # generic + if subtitle in paragraphs_os_free_text.keys(): + write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options, is_linux_tutorial) + + # os-specific + else: + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial) # clean up temporary directories and files - shutil.rmtree(COPIES, ignore_errors=True) - shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) - shutil.rmtree(LINUX_TUTORIAL, ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL), ignore_errors=True) if os.path.exists(TEMP_JINJA_FILE): os.remove(TEMP_JINJA_FILE) @@ -1011,6 +991,8 @@ def main(options): # adding command-line options + parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") + parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is disabled (default: 160)") parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") @@ -1019,7 +1001,9 @@ def main(options): args = parser.parse_args() - options_dict = {SPLIT_ON_TITLES: args.split_on_titles, + options_dict = {SOURCE_DIRECTORY: args.source, + DESTINATION_DIRECTORY: args.destination, + SPLIT_ON_TITLES: args.split_on_titles, SPLIT_ON_PARAGRAPHS: not args.split_on_titles, MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, MAX_TITLE_DEPTH: args.max_title_depth, From a6d99d9c724e453c9adb4262b747ddbf01ab711e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 12:27:20 +0200 Subject: [PATCH 098/152] cleanup --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 245c5d68f51..bfc152cee60 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -922,13 +922,6 @@ def main(options): for file in files: filenames[file] = os.path.join(source_directory, file) - # # Temporary variables to test with just one singular file - # filenames_generic = {} - # filenames_linux = {} - # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" - # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" - # filenames_linux["common_pitfalls.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/common_pitfalls.md" - # for loops over all files for filename in filenames.keys(): ################### define/reset loop specific variables ################### From 2be834f19ce8729a0d28ef4b89ddeea59b5e398e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:14:22 +0200 Subject: [PATCH 099/152] cleanup --- .../HPC_chatbot_preprocessor/chatbot_parser.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index bfc152cee60..26cf15b79a2 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -86,6 +86,9 @@ # link indicators LINK_MARKER = r'Ā§linkĀ§linkĀ§' +# HTML tags +HTML_TAGS = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] # make sure these are always lowercase + # regex patterns IF_MANGLED_PATTERNS = { IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', @@ -164,9 +167,8 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): match = re.findall(r'<(.*?)>', curr_line) if match: for i, content in enumerate(match): - syntax_words = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] # make sure these are always lowercase - syntax_words_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in syntax_words])) - syntax_words_style = [element + " style=.*" for element in syntax_words] + html_tags_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in HTML_TAGS])) + html_tags_style = [element + " style=.*" for element in HTML_TAGS] # add references for every link of format if re.search(r'a href=.*', content): @@ -175,11 +177,11 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): linklist.append(link) # drop the syntax words - elif content.lower() in syntax_words_variations: + elif content.lower() in html_tags_variations: curr_line = re.sub(f'<{content}>', "", curr_line) - # drop the version of the syntax_words followed by " style=" - elif any(re.match(pattern, content) for pattern in syntax_words_style): + # drop the version of the HTML_TAGS followed by " style=" + elif any(re.match(pattern, content) for pattern in html_tags_style): curr_line = re.sub(r'<.*?>', "", curr_line) # drop markdown comments @@ -983,7 +985,6 @@ def main(options): parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") # adding command-line options - parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") From 532543a18785e966a76a830c04055ec46425d20e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:17:07 +0200 Subject: [PATCH 100/152] cleanup --- scripts/HPC_chatbot_preprocessor/README.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 82aaa9b7e3c..bc2922aaf5a 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -16,35 +16,37 @@ This command has the following possible options: chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] ``` -### `h`/`help` +### Options + +#### `h`/`help` Display the help message -### `src`/`source` +#### `src`/`source` This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`). -### `dst`/`destination` +#### `dst`/`destination` This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script. -### `st`/`split_on_titles` +#### `st`/`split_on_titles` Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length. -### `pl`/`min_paragraph_length` +#### `pl`/`min_paragraph_length` This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 160 characters. This options only works if `split_on_titles` is not enabled. -### `td`/`max_title_depth` +#### `td`/`max_title_depth` This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4. -### `l`/`links` +#### `l`/`links` Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext. -### `dd`/`deep_directories` +#### `dd`/`deep_directories` Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled. From 107464e57b3581d96130eeea63f7d3390025125e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:42:40 +0200 Subject: [PATCH 101/152] relocate test files --- .../if_mangler_test_files/if_mangler_1_input.md | 0 .../if_mangler_test_files/if_mangler_1_output.md | 0 .../if_mangler_test_files/if_mangler_2_input.md | 0 .../if_mangler_test_files/if_mangler_2_output.md | 0 .../if_mangler_test_files/if_mangler_3_input.md | 0 .../if_mangler_test_files/if_mangler_3_output.md | 0 .../if_mangler_test_files/if_mangler_4_input.md | 0 .../if_mangler_test_files/if_mangler_4_output.md | 0 .../if_mangler_test_files/if_mangler_5_input.md | 0 .../if_mangler_test_files/if_mangler_5_output.md | 0 .../if_mangler_test_files/if_mangler_6_input.md | 0 .../if_mangler_test_files/if_mangler_6_output.md | 0 .../if_mangler_test_files/if_mangler_7_input.md | 0 .../if_mangler_test_files/if_mangler_7_output.md | 0 .../if_mangler_test_files/if_mangler_input.md | 0 .../if_mangler_test_files/if_mangler_output.md | 0 .../tests/test_files/test_paragraph_split_1.md | 0 .../example_text_1.md => test_files/test_title_split_1.md} | 0 18 files changed, 0 insertions(+), 0 deletions(-) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_1_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_1_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_2_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_2_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_3_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_3_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_4_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_4_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_5_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_5_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_6_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_6_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_7_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_7_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_output.md (100%) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md rename scripts/HPC_chatbot_preprocessor/tests/{example_files/example_text_1.md => test_files/test_title_split_1.md} (100%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md From dd64381efc3b7156c2905f69aec54572b2ca2c53 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:47:22 +0200 Subject: [PATCH 102/152] update arguments of if mangler --- scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py index 17053fe705c..4d0dd876103 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -18,10 +18,10 @@ def test_if_mangler(input_file, output_file): os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) # make filepaths - input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file) - expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file) + input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) + expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) actual_output_file_path = os.path.join("if_mangled_files", input_file) - mangle_ifs(input_file_path, input_file) + mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) # check every line with open(expected_output_file_path, "r") as expected_read_file: From ef3fd584a21e2e417363a5f083bcf94261739ceb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:54:19 +0200 Subject: [PATCH 103/152] relocate full test files --- .../test_paragraph_split_1.md | 0 .../test_title_split_1.md | 0 .../if_mangler_1_input.md | 4 -- .../if_mangler_1_output.md | 4 -- .../if_mangler_2_input.md | 7 --- .../if_mangler_2_output.md | 7 --- .../if_mangler_3_input.md | 6 -- .../if_mangler_3_output.md | 6 -- .../if_mangler_4_input.md | 4 -- .../if_mangler_4_output.md | 4 -- .../if_mangler_5_input.md | 11 ---- .../if_mangler_5_output.md | 11 ---- .../if_mangler_6_input.md | 8 --- .../if_mangler_6_output.md | 8 --- .../if_mangler_7_input.md | 9 --- .../if_mangler_7_output.md | 9 --- .../if_mangler_test_files/if_mangler_input.md | 55 ------------------- .../if_mangler_output.md | 55 ------------------- 18 files changed, 208 deletions(-) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_paragraph_split}/test_paragraph_split_1.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_title_split}/test_title_split_1.md (100%) delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md deleted file mode 100644 index 6a74b3c0181..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md +++ /dev/null @@ -1,4 +0,0 @@ -test1: OS_IF -{% if OS == windows %} -test1 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md deleted file mode 100644 index 2f9cdc38294..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md +++ /dev/null @@ -1,4 +0,0 @@ -test1: OS_IF -{-if-% if OS == windows %-if-} -test1 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md deleted file mode 100644 index 360a4a59ba3..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md +++ /dev/null @@ -1,7 +0,0 @@ -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{% if OS == windows %} -test2 -{% endif %} -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md deleted file mode 100644 index 798dcf6db24..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md +++ /dev/null @@ -1,7 +0,0 @@ -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{-if-% if OS == windows %-if-} -test2 -{-if-% endif %-if-} -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md deleted file mode 100644 index d93125a5971..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md +++ /dev/null @@ -1,6 +0,0 @@ -test3: OS_IF with else -{% if OS == linux %} -test3 -{% else %} -test3 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md deleted file mode 100644 index 02141961338..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md +++ /dev/null @@ -1,6 +0,0 @@ -test3: OS_IF with else -{-if-% if OS == linux %-if-} -test3 -{-if-% else %-if-} -test3 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md deleted file mode 100644 index cc15fae1df1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md +++ /dev/null @@ -1,4 +0,0 @@ -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md deleted file mode 100644 index cc15fae1df1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md +++ /dev/null @@ -1,4 +0,0 @@ -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md deleted file mode 100644 index bdb288474e2..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md +++ /dev/null @@ -1,11 +0,0 @@ -test5: OS_IF in OS_IF -{% if OS == windows %} -test5 -{% else %} -{% if OS == linux %} -test5 -{% else %} -test5 -{% endif %} -test5 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md deleted file mode 100644 index 10443eb67a4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md +++ /dev/null @@ -1,11 +0,0 @@ -test5: OS_IF in OS_IF -{-if-% if OS == windows %-if-} -test5 -{-if-% else %-if-} -{-if-% if OS == linux %-if-} -test5 -{-if-% else %-if-} -test5 -{-if-% endif %-if-} -test5 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md deleted file mode 100644 index 0731ee3588c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md +++ /dev/null @@ -1,8 +0,0 @@ -test6: NON_OS_IF in OS_IF -{% if OS == macos %} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md deleted file mode 100644 index cd37117cb00..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md +++ /dev/null @@ -1,8 +0,0 @@ -test6: NON_OS_IF in OS_IF -{-if-% if OS == macos %-if-} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md deleted file mode 100644 index 6a72a338527..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md +++ /dev/null @@ -1,9 +0,0 @@ -test7: weird spacing and dashes - {%if OS == windows %} - test7 -{%- else%} - test7 - {% if OS == linux%} -test7 - {%-endif %} -{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md deleted file mode 100644 index dfe342ebfb1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md +++ /dev/null @@ -1,9 +0,0 @@ -test7: weird spacing and dashes - {-if-%if OS == windows %-if-} - test7 -{-if-%- else%-if-} - test7 - {-if-% if OS == linux%-if-} -test7 - {-if-%-endif %-if-} -{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md deleted file mode 100644 index fb8c1f8b539..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md +++ /dev/null @@ -1,55 +0,0 @@ -test1: OS_IF -{% if OS == windows %} -test1 -{% endif %} - -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{% if OS == windows %} -test2 -{% endif %} -{% endif %} - -test3: OS_IF with else -{% if OS == linux %} -test3 -{% else %} -test3 -{% endif %} - -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } - -test5: OS_IF in OS_IF -{% if OS == windows %} -test5 -{% else %} -{% if OS == linux %} -test5 -{% else %} -test5 -{% endif %} -test5 -{% endif %} - -test6: NON_OS_IF in OS_IF -{% if OS == macos %} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{% endif %} - -test7: weird spacing and dashes - {%if OS == windows %} - test7 -{%- else%} - test7 - {% if OS == linux%} -test7 - {%-endif %} -{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md deleted file mode 100644 index 796e94348fa..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md +++ /dev/null @@ -1,55 +0,0 @@ -test1: OS_IF -{-if-% if OS == windows %-if-} -test1 -{-if-% endif %-if-} - -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{-if-% if OS == windows %-if-} -test2 -{-if-% endif %-if-} -{% endif %} - -test3: OS_IF with else -{-if-% if OS == linux %-if-} -test3 -{-if-% else %-if-} -test3 -{-if-% endif %-if-} - -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } - -test5: OS_IF in OS_IF -{-if-% if OS == windows %-if-} -test5 -{-if-% else %-if-} -{-if-% if OS == linux %-if-} -test5 -{-if-% else %-if-} -test5 -{-if-% endif %-if-} -test5 -{-if-% endif %-if-} - -test6: NON_OS_IF in OS_IF -{-if-% if OS == macos %-if-} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{-if-% endif %-if-} - -test7: weird spacing and dashes - {-if-%if OS == windows %-if-} - test7 -{-if-%- else%-if-} - test7 - {-if-% if OS == linux%-if-} -test7 - {-if-%-endif %-if-} -{-if-%endif%-if-} \ No newline at end of file From 4d7db8f889decbcf157ef08c55912c3e269ef382 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:59:49 +0200 Subject: [PATCH 104/152] Revert "update arguments of if mangler" This reverts commit dd64381efc3b7156c2905f69aec54572b2ca2c53. --- scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py index 4d0dd876103..17053fe705c 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -18,10 +18,10 @@ def test_if_mangler(input_file, output_file): os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) # make filepaths - input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) - expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) + input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file) + expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file) actual_output_file_path = os.path.join("if_mangled_files", input_file) - mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) + mangle_ifs(input_file_path, input_file) # check every line with open(expected_output_file_path, "r") as expected_read_file: From df9bac5031138324895fa70b6d16d82c8fa2e164 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 14:02:12 +0200 Subject: [PATCH 105/152] Revert "relocate full test files" This reverts commit ef3fd584a21e2e417363a5f083bcf94261739ceb. --- .../if_mangler_1_input.md | 4 ++ .../if_mangler_1_output.md | 4 ++ .../if_mangler_2_input.md | 7 +++ .../if_mangler_2_output.md | 7 +++ .../if_mangler_3_input.md | 6 ++ .../if_mangler_3_output.md | 6 ++ .../if_mangler_4_input.md | 4 ++ .../if_mangler_4_output.md | 4 ++ .../if_mangler_5_input.md | 11 ++++ .../if_mangler_5_output.md | 11 ++++ .../if_mangler_6_input.md | 8 +++ .../if_mangler_6_output.md | 8 +++ .../if_mangler_7_input.md | 9 +++ .../if_mangler_7_output.md | 9 +++ .../if_mangler_test_files/if_mangler_input.md | 55 +++++++++++++++++++ .../if_mangler_output.md | 55 +++++++++++++++++++ .../test_paragraph_split_1.md | 0 .../test_title_split_1.md | 0 18 files changed, 208 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_paragraph_split => }/test_paragraph_split_1.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_title_split => }/test_title_split_1.md (100%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md new file mode 100644 index 00000000000..6a74b3c0181 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md @@ -0,0 +1,4 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md new file mode 100644 index 00000000000..2f9cdc38294 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md @@ -0,0 +1,4 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md new file mode 100644 index 00000000000..360a4a59ba3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md new file mode 100644 index 00000000000..798dcf6db24 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md new file mode 100644 index 00000000000..d93125a5971 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md new file mode 100644 index 00000000000..02141961338 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md new file mode 100644 index 00000000000..cc15fae1df1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md new file mode 100644 index 00000000000..cc15fae1df1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md new file mode 100644 index 00000000000..bdb288474e2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md new file mode 100644 index 00000000000..10443eb67a4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md new file mode 100644 index 00000000000..0731ee3588c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md new file mode 100644 index 00000000000..cd37117cb00 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md new file mode 100644 index 00000000000..6a72a338527 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md new file mode 100644 index 00000000000..dfe342ebfb1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md new file mode 100644 index 00000000000..fb8c1f8b539 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md @@ -0,0 +1,55 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} + +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} + +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} + +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md new file mode 100644 index 00000000000..796e94348fa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md @@ -0,0 +1,55 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} + +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} + +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} + +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md From 631d9e9c26945359eb25ce08a37cd424061c2407 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 14:08:22 +0200 Subject: [PATCH 106/152] update test to adapt to new arguments in if mangler --- scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py index 17053fe705c..4d0dd876103 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -18,10 +18,10 @@ def test_if_mangler(input_file, output_file): os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) # make filepaths - input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file) - expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file) + input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) + expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) actual_output_file_path = os.path.join("if_mangled_files", input_file) - mangle_ifs(input_file_path, input_file) + mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) # check every line with open(expected_output_file_path, "r") as expected_read_file: From c6e600dcbdf9885b41cd8cbd07917a92d2b423a6 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 14:15:25 +0200 Subject: [PATCH 107/152] relocated full test files --- .../{ => full_test_paragraph_split}/test_paragraph_split_1.md | 0 .../test_files/{ => full_test_title_split}/test_title_split_1.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_paragraph_split}/test_paragraph_split_1.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_title_split}/test_title_split_1.md (100%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md From d1c6194e8aa75301f7e3b1a2396eb13538de063d Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:17:42 +0200 Subject: [PATCH 108/152] Rename test_paragraph_split_1.md to test_paragraph_split_1_input.md --- .../full_test_paragraph_split/test_paragraph_split_1.md | 0 .../full_test_paragraph_split/test_paragraph_split_1_input.md | 1 + 2 files changed, 1 insertion(+) delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md new file mode 100644 index 00000000000..d3f5a12faa9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md @@ -0,0 +1 @@ + From 695ffd635a61ff44514232a1b37f55198100f0bd Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:18:00 +0200 Subject: [PATCH 109/152] Rename test_title_split_1.md to test_title_split_1_input.md --- .../{test_title_split_1.md => test_title_split_1_input.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/{test_title_split_1.md => test_title_split_1_input.md} (98%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md similarity index 98% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md index 9b810c3f41a..5065852e2a1 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md @@ -28,4 +28,4 @@ blablabla generic with a [link](generic.md) ## Subtitle 5 generic -blablabla \ No newline at end of file +blablabla From af4832b5a8cd50bc790353a232fcca5e51e35e90 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 15:08:37 +0200 Subject: [PATCH 110/152] smal fix --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 26cf15b79a2..db2c5e84257 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -825,7 +825,8 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or # add first subtitle in front of section again if len(jinja_text) != 0: - jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text + if options[SPLIT_ON_TITLES]: + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text # re-adjust text to correct overcorrections jinja_text = re.sub('"' + OS + '"', OS, jinja_text) @@ -951,7 +952,7 @@ def main(options): # create directories for the source markdown file for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True) + os.makedirs(directory, exist_ok=True) # process the jinja macros jinja_parser(filename, copy_file, options) From 8805c8c01b6efe814fae4bba5f4b05f9e9d8beb2 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 15:11:52 +0200 Subject: [PATCH 111/152] test text for paragraph split --- .../test_paragraph_split_1_input.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md index d3f5a12faa9..44ac82c795d 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md @@ -1 +1,43 @@ +# Main title +This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md). +It also contains some `other` *Markdown* _syntax_ and an +```shell +example code block. +``` +This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum +character limit for a section). + +## OS specific sections + +This is the second section, it is the start of some {% if OS == windows %} text specific to windows. +In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer +to make sure we get a long section that is over the minimum required length for the next newline character to be +classified as the end of this section. I am doing this because for the next sections I want to test whether they will be +grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, +let's add [a link](windows.md) in this section as well. + +### Windows specific section + +Like this. + +And this. + +And also this. + +These section should all be grouped together under the windows specific section of the output. The addition of this long +section at the end should make sure the combination of sections comes to an end here. +{% else %} +text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will +still add [a link](linuxmacos.md). + +### Non Windows section + +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. +{% endif %} + +## Conclusion + +Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I +might add to this if needed. From a265ffd87121d3d195670cc76f8d94b4b8bcc009 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 16:52:20 +0200 Subject: [PATCH 112/152] start of a fix for double title problem, not done yet --- .../chatbot_parser.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index db2c5e84257..c0b91319912 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -401,11 +401,13 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # metadata title metadata_title = main_title + # TODO: define metadata data if split occurs on paragraphs and last_title and title_level are known (placeholder in place right now) + if current_paragraph_number != -1: + last_title_level = 5 + last_dir = "PLACEHOLDER" + # list to keep track of most recent directories on each title level - if LINUX_TUTORIAL not in file: - curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] - else: - curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)] + curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] with open(file, 'r') as readfile: @@ -885,7 +887,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or pass -def main(options): +def main(options, verbose=True): """ main function @@ -896,10 +898,11 @@ def main(options): MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not} + :param verbose: boolean indicating whether print statements from the main function should be print, only used when for testing :return: """ - if options[DEEP_DIRECTORIES]: + if options[DEEP_DIRECTORIES] and verbose: print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason @@ -915,7 +918,7 @@ def main(options): ################### define loop-invariant variables ################### - # constant that keeps track of the source directories + # constant that keeps track of the source directory source_directory = options[SOURCE_DIRECTORY] # list of all the filenames @@ -952,7 +955,7 @@ def main(options): # create directories for the source markdown file for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - os.makedirs(directory, exist_ok=True) + os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True) # process the jinja macros jinja_parser(filename, copy_file, options) @@ -978,7 +981,8 @@ def main(options): if os.path.exists(TEMP_JINJA_FILE): os.remove(TEMP_JINJA_FILE) - print("Parsing finished successfully") + if verbose: + print("Parsing finished successfully") ################### run the script ################### From 6c2a61c25215cf3d5c942c6c2de7804baf725584 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 09:51:47 +0200 Subject: [PATCH 113/152] Fix for double title bug when splitting on paragraph --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c0b91319912..72ebbcee3ab 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -825,10 +825,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or template = Template(text) jinja_text = template.render(OS=OS) - # add first subtitle in front of section again if len(jinja_text) != 0: - if options[SPLIT_ON_TITLES]: - jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text + + # add first subtitle in front of section again + if options[SPLIT_ON_TITLES] or metadata[SUBTITLE].replace("-", " ") not in jinja_text[:len(metadata[SUBTITLE]) + 1]: + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text # re-adjust text to correct overcorrections jinja_text = re.sub('"' + OS + '"', OS, jinja_text) From ed088794e1b6ceb5b805c87a82bcd31df6931299 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 10:05:56 +0200 Subject: [PATCH 114/152] Fix bug for empty linklist in metadata --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 72ebbcee3ab..dfa2972b9fc 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -741,6 +741,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe writefile.write(text) # write metadata + # check if links in metadata is not empty + if LINKS in metadata.keys() and len(metadata[LINKS].keys()) == 0: + del metadata[LINKS] + # add previous subtitle if title_order_number != 0: metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1] @@ -830,6 +834,8 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or # add first subtitle in front of section again if options[SPLIT_ON_TITLES] or metadata[SUBTITLE].replace("-", " ") not in jinja_text[:len(metadata[SUBTITLE]) + 1]: jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text + else: + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text # re-adjust text to correct overcorrections jinja_text = re.sub('"' + OS + '"', OS, jinja_text) From 176af130ab9837f3d28511bcf113aeb38bed1c9b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 10:33:17 +0200 Subject: [PATCH 115/152] fix bug where too many directories were sometimes created --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index dfa2972b9fc..b0bacbbca17 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -962,7 +962,7 @@ def main(options, verbose=True): # create directories for the source markdown file for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True) + os.makedirs(directory, exist_ok=True) # process the jinja macros jinja_parser(filename, copy_file, options) From d4ceac8962b2bf61def602b5dad3ecfc7d12bc1e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 10:41:49 +0200 Subject: [PATCH 116/152] test of full script, test files not ready to be pushed yet --- .../tests/test_full_script.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_full_script.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py new file mode 100644 index 00000000000..61a6f3f1bdf --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py @@ -0,0 +1,66 @@ +import pytest +import os +import shutil +from chatbot_parser import main + + +@pytest.mark.parametrize("input_directory,actual_output_directory,expected_output_directory, options", [ + ("tests/test_files/ftps", "tests/test_files/ftps/actual", + "tests/test_files/ftps/output", + {"SOURCE_DIRECTORY": "tests/test_files/ftps", + "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 160, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False} + ), + ("tests/test_files/ftts", "tests/test_files/ftts/actual", + "tests/test_files/ftts/output", + {"SOURCE_DIRECTORY": "tests/test_files/ftts", + "DESTINATION_DIRECTORY": "tests/test_files/ftts/actual", + "SPLIT_ON_TITLES": True, + "SPLIT_ON_PARAGRAPHS": False, + "MIN_PARAGRAPH_LENGTH": 160, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": True} + ) +]) +def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options): + # run the script + main(options, verbose=False) + + # Compare directories and files + for dirpath, dirnames, filenames in os.walk(expected_output_directory): + relative_path = os.path.relpath(dirpath, expected_output_directory) + actual_dir = os.path.join(actual_output_directory, relative_path) + + # Check if the directory exists + assert os.path.isdir(actual_dir), f"Directory '{actual_dir}' is missing." + + # Check for files + for filename in filenames: + ref_file = os.path.join(dirpath, filename) + gen_file = os.path.join(actual_dir, filename) + + # Check if the file exists + assert os.path.isfile(gen_file), f"File '{gen_file}' is missing." + + # Check file content + with open(ref_file, 'r') as ref_f, open(gen_file, 'r') as gen_f: + ref_content = ref_f.read().strip() + gen_content = gen_f.read().strip() + assert ref_content == gen_content, f"Content of file '{gen_file}' does not match." + + # check that not too many directories have been generated + for dirpath, dirnames, filenames in os.walk(actual_output_directory): + relative_path = os.path.relpath(dirpath, actual_output_directory) + expected_dir = os.path.join(expected_output_directory, relative_path) + + # Check if the directory exists + assert os.path.isdir(expected_dir), f"Directory '{relative_path}' was made, but shouldn't have been." + + # remove directory + shutil.rmtree(actual_output_directory, ignore_errors=True) From 815a863fc83f37bfa49976ca14ce23e63e3fafa4 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 12:18:12 +0200 Subject: [PATCH 117/152] updated requirements.txt --- scripts/HPC_chatbot_preprocessor/requirements.txt | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 907f08fda77..4d27d462460 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -1,7 +1,2 @@ -os -re -shutil -pypandoc -yaml -jinja2 -pathlib \ No newline at end of file +PyYAML==6.0.2 +Jinja2==3.1.4 \ No newline at end of file From d15469f420a86edeabda1472497c38206b53351d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 12:19:05 +0200 Subject: [PATCH 118/152] updated docstring in main function --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b0bacbbca17..698278da90d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -899,7 +899,9 @@ def main(options, verbose=True): main function :param options: dictionary containing the options specified by the user to run the script: - {SPLIT_ON_TITLES: boolean indicating whether to split on titles, + {SOURCE_DIRECTORY: The source directory where the original files are located, + DESTINATION_DIRECTORY: The destination directory where the processed files should be written to, + SPLIT_ON_TITLES: boolean indicating whether to split on titles, SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES), MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph, MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, From daa6b36e07854f1b41b5907339bf283218d93a2c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 12:32:09 +0200 Subject: [PATCH 119/152] add support for comments for the bot to be included in the source files --- scripts/HPC_chatbot_preprocessor/README.md | 8 +++++++- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 11 +++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index bc2922aaf5a..2cb30bdc985 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -149,7 +149,7 @@ This will also result in the parser "forgetting" it opened an os-specific if-sta The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. -### Markdown comments +### Comments Any comments within the markdown files (for example TODO's) should follow the following syntax: @@ -158,6 +158,12 @@ Any comments within the markdown files (for example TODO's) should follow the fo ``` and should be limited to one line. +Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such. + +``` + +``` + ### Long filepaths Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 698278da90d..338cdef32f5 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -83,7 +83,7 @@ ELSE = "else" ENDIF = "endif" -# link indicators +# link indicator LINK_MARKER = r'Ā§linkĀ§linkĀ§' # HTML tags @@ -101,6 +101,9 @@ _PARAGRAPH_ = "_paragraph_" METADATA_EXTENSION = "_metadata" +# Marker for comments for the bot +INPUT_FOR_BOT = "INPUT_FOR_BOT" + ################### define functions ################### @@ -184,7 +187,11 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): elif any(re.match(pattern, content) for pattern in html_tags_style): curr_line = re.sub(r'<.*?>', "", curr_line) - # drop markdown comments + # keep comments for bot + elif re.fullmatch(r'!--' + INPUT_FOR_BOT + r'.*?--', content): + curr_line = re.sub(r'', lambda m: m.group(1), curr_line) + + # drop comments elif re.fullmatch(r'!--.*?--', content): curr_line = re.sub(r'<.*?>', "", curr_line) From 4c19f442e2e4f6af1f2448e26cf0b1b29e4522ac Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 13:30:01 +0200 Subject: [PATCH 120/152] changed the default for min paragraph length --- scripts/HPC_chatbot_preprocessor/README.md | 2 +- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 2cb30bdc985..b3bce665973 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -36,7 +36,7 @@ Including this option will split the source files based on the titles and subtit #### `pl`/`min_paragraph_length` -This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 160 characters. This options only works if `split_on_titles` is not enabled. +This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 683 characters. This options only works if `split_on_titles` is not enabled. #### `td`/`max_title_depth` diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 338cdef32f5..a041160c855 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1009,7 +1009,7 @@ def main(options, verbose=True): parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is disabled (default: 160)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=683, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)") parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") From 9a6ff5814422fc2ea0d4a128407302572d964105 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 13:31:40 +0200 Subject: [PATCH 121/152] added test files for full script test --- .../generic/tps1/tps1_paragraph_1.txt | 6 ++ .../tps1/tps1_paragraph_1_metadata.json | 14 +++ .../generic/tps1/tps1_paragraph_3.txt | 3 + .../tps1/tps1_paragraph_3_metadata.json | 11 +++ .../linux/tps1/tps1_linux_paragraph_2.1.txt | 4 + .../tps1_linux_paragraph_2.1_metadata.json | 14 +++ .../linux/tps1/tps1_linux_paragraph_2.2.txt | 3 + .../tps1_linux_paragraph_2.2_metadata.json | 11 +++ .../macos/tps1/tps1_macos_paragraph_2.1.txt | 4 + .../tps1_macos_paragraph_2.1_metadata.json | 14 +++ .../macos/tps1/tps1_macos_paragraph_2.2.txt | 3 + .../tps1_macos_paragraph_2.2_metadata.json | 11 +++ .../tps1/tps1_windows_paragraph_2.1.txt | 7 ++ .../tps1_windows_paragraph_2.1_metadata.json | 14 +++ .../tps1/tps1_windows_paragraph_2.2.txt | 6 ++ .../tps1_windows_paragraph_2.2_metadata.json | 11 +++ .../tps1.md} | 86 +++++++++---------- .../tts1/Main-title/Subtitle-1/Subtitle-1.txt | 2 + .../Subtitle-1/Subtitle-1_metadata.json | 11 +++ .../Main-title/Subtitle-5-g/Subtitle-5-g.txt | 1 + .../Subtitle-5-g/Subtitle-5-g_metadata.json | 11 +++ .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 14 +++ .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 + .../Subtitle-4-l&m_metadata.json | 14 +++ .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 14 +++ .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 + .../Subtitle-4-l&m_metadata.json | 14 +++ .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 14 +++ .../Subtitle-3-w/Subtitle-3-w.txt | 3 + .../Subtitle-3-w/Subtitle-3-w_metadata.json | 14 +++ .../tts1.md} | 8 +- 34 files changed, 313 insertions(+), 47 deletions(-) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_paragraph_split/test_paragraph_split_1_input.md => ftps/tps1.md} (97%) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_title_split/test_title_split_1_input.md => ftts/tts1.md} (76%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt new file mode 100644 index 00000000000..94270ff37e3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt @@ -0,0 +1,6 @@ +Main title +This is the first paragraph of text. It is non-os-specific, however it does contain a link. +It also contains some other Markdown syntax and an +example code block. +This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum +character limit for a section). diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json new file mode 100644 index 00000000000..19e44fad91d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tps1", + "subtitle": "Main-title", + "title_depth": 1, + "directory": "tps1", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "parent_title": "", + "previous_title": null, + "next_title": "tps1_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt new file mode 100644 index 00000000000..58eedc06aa0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt @@ -0,0 +1,3 @@ +Conclusion +Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I +might add to this if needed. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json new file mode 100644 index 00000000000..b4c98ff6465 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tps1", + "subtitle": "Conclusion", + "title_depth": 2, + "directory": "tps1", + "parent_title": "", + "previous_title": "tps1_paragraph_2", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt new file mode 100644 index 00000000000..d0ee9ce8256 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt @@ -0,0 +1,4 @@ +OS specific sections +This is the second section, it is the start of some +text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will +still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json new file mode 100644 index 00000000000..bac81ed87e3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tps1", + "subtitle": "OS-specific-sections", + "title_depth": 2, + "directory": "tps1", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "tps1_paragraph_1", + "next_title": "tps1_linux_paragraph_2.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt new file mode 100644 index 00000000000..1a3867e69fa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt @@ -0,0 +1,3 @@ +Non Windows section +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json new file mode 100644 index 00000000000..522265436ab --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tps1", + "subtitle": "Non-Windows-section", + "title_depth": 3, + "directory": "tps1", + "parent_title": "OS-specific-sections", + "previous_title": "tps1_linux_paragraph_2.1", + "next_title": "tps1_paragraph_3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt new file mode 100644 index 00000000000..e0642d6ac96 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt @@ -0,0 +1,4 @@ +OS specific sections +This is the second section, it is the start of some +text specific to OSes that aren't "windows". I feel like there is no need to make this section very long, however I will +still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json new file mode 100644 index 00000000000..5d9ec163f99 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tps1", + "subtitle": "OS-specific-sections", + "title_depth": 2, + "directory": "tps1", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "tps1_paragraph_1", + "next_title": "tps1_macos_paragraph_2.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt new file mode 100644 index 00000000000..1a3867e69fa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt @@ -0,0 +1,3 @@ +Non Windows section +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json new file mode 100644 index 00000000000..7b06f06efdd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tps1", + "subtitle": "Non-Windows-section", + "title_depth": 3, + "directory": "tps1", + "parent_title": "OS-specific-sections", + "previous_title": "tps1_macos_paragraph_2.1", + "next_title": "tps1_paragraph_3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt new file mode 100644 index 00000000000..9a9cbe1f3d2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt @@ -0,0 +1,7 @@ +OS specific sections +This is the second section, it is the start of some text specific to windows. +In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer +to make sure we get a long section that is over the minimum required length for the next newline character to be +classified as the end of this section. I am doing this because for the next sections I want to test whether they will be +grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, +let's add a link in this section as well. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json new file mode 100644 index 00000000000..e8e50aa6c32 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tps1", + "subtitle": "OS-specific-sections", + "title_depth": 2, + "directory": "tps1", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/windows" + }, + "previous_title": "tps1_paragraph_1", + "next_title": "tps1_windows_paragraph_2.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt new file mode 100644 index 00000000000..6b57235f68f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt @@ -0,0 +1,6 @@ +Windows specific section +Like this. +And this. +And also this. +These section should all be grouped together under the windows specific section of the output. The addition of this long +section at the end should make sure the combination of sections comes to an end here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json new file mode 100644 index 00000000000..84ea6ad53f9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tps1", + "subtitle": "Windows-specific-section", + "title_depth": 3, + "directory": "tps1", + "parent_title": "OS-specific-sections", + "previous_title": "tps1_windows_paragraph_2.1", + "next_title": "tps1_paragraph_3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md similarity index 97% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md index 44ac82c795d..d9b10d0c524 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md @@ -1,43 +1,43 @@ -# Main title - -This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md). -It also contains some `other` *Markdown* _syntax_ and an -```shell -example code block. -``` -This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum -character limit for a section). - -## OS specific sections - -This is the second section, it is the start of some {% if OS == windows %} text specific to windows. -In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer -to make sure we get a long section that is over the minimum required length for the next newline character to be -classified as the end of this section. I am doing this because for the next sections I want to test whether they will be -grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, -let's add [a link](windows.md) in this section as well. - -### Windows specific section - -Like this. - -And this. - -And also this. - -These section should all be grouped together under the windows specific section of the output. The addition of this long -section at the end should make sure the combination of sections comes to an end here. -{% else %} -text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will -still add [a link](linuxmacos.md). - -### Non Windows section - -Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise -section that ends right here. -{% endif %} - -## Conclusion - -Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I -might add to this if needed. +# Main title + +This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md). +It also contains some `other` *Markdown* _syntax_ and an +```shell +example code block. +``` +This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum +character limit for a section). + +## OS specific sections + +This is the second section, it is the start of some {% if OS == windows %} text specific to windows. +In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer +to make sure we get a long section that is over the minimum required length for the next newline character to be +classified as the end of this section. I am doing this because for the next sections I want to test whether they will be +grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, +let's add [a link](windows.md) in this section as well. + +### Windows specific section + +Like this. + +And this. + +And also this. + +These section should all be grouped together under the windows specific section of the output. The addition of this long +section at the end should make sure the combination of sections comes to an end here. +{% else %} +text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will +still add [a link](linuxmacos.md). + +### Non Windows section + +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. +{% endif %} + +## Conclusion + +Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I +might add to this if needed. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt new file mode 100644 index 00000000000..f62a4f31fee --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt @@ -0,0 +1,2 @@ +blablabla +blablablabla diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json new file mode 100644 index 00000000000..9fdbce652bf --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-1", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-1", + "parent_title": "Main-title", + "previous_title": "Main-title", + "next_title": "Subtitle-2-g", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-1" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt new file mode 100644 index 00000000000..bdf68551202 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt @@ -0,0 +1 @@ +blablabla \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json new file mode 100644 index 00000000000..b48bcaaa08c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-5-g", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-5-g", + "parent_title": "Main-title", + "previous_title": "Subtitle-2-g", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-5-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt new file mode 100644 index 00000000000..48125d91679 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt @@ -0,0 +1,4 @@ +blablabla generic +blablabla generic +blablabla Linux macOS +blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json new file mode 100644 index 00000000000..a2b68c8865e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-2-g", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-2-g", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "Subtitle-1", + "next_title": "Subtitle-4-l&m", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-2-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt new file mode 100644 index 00000000000..b221f26074b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt @@ -0,0 +1,3 @@ +blablabla Linux macOS +blablablabla Linux macOS +blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json new file mode 100644 index 00000000000..537541e2cb0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-4-l&m", + "title_depth": 3, + "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", + "parent_title": "Subtitle-2-g", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "previous_title": "Subtitle-2-g", + "next_title": "Subtitle-5-g", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-4-lm" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt new file mode 100644 index 00000000000..48125d91679 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt @@ -0,0 +1,4 @@ +blablabla generic +blablabla generic +blablabla Linux macOS +blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json new file mode 100644 index 00000000000..6846da26b72 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-2-g", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-2-g", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "Subtitle-1", + "next_title": "Subtitle-4-l&m", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-2-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt new file mode 100644 index 00000000000..b221f26074b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt @@ -0,0 +1,3 @@ +blablabla Linux macOS +blablablabla Linux macOS +blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json new file mode 100644 index 00000000000..4e167b116d2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-4-l&m", + "title_depth": 3, + "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", + "parent_title": "Subtitle-2-g", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "previous_title": "Subtitle-2-g", + "next_title": "Subtitle-5-g", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-4-lm" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt new file mode 100644 index 00000000000..f9f20592832 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt @@ -0,0 +1,4 @@ +blablabla generic +blablabla generic +blablabla windows +blablabla windows with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json new file mode 100644 index 00000000000..c4620a94080 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-2-g", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-2-g", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/windows" + }, + "previous_title": "Subtitle-1", + "next_title": "Subtitle-3-w", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-2-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt new file mode 100644 index 00000000000..0b587cef85a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt @@ -0,0 +1,3 @@ +blablabla windows +blablablabla windows +blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json new file mode 100644 index 00000000000..aa4b6317ce6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-3-w", + "title_depth": 3, + "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w", + "parent_title": "Subtitle-2-g", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "previous_title": "Subtitle-2-g", + "next_title": "Subtitle-5-g", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-3-w" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md similarity index 76% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md index 5065852e2a1..2f3ad7f9c08 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md @@ -5,27 +5,27 @@ blablabla blablablabla -### Subtitle 2 partly generic +## Subtitle 2 g blablabla generic blablabla generic {% if OS == windows %}blablabla windows blablabla windows with a [link](windows.md) -#### Subtitle 3 Windows specific +### Subtitle 3 w blablabla windows blablablabla windows {% else %}blablabla Linux macOS blablablabla Linux macOS with a [link](linuxmacos.md) -#### Subtitle 4 Linux and macOS specific +### Subtitle 4 l&m blablabla Linux macOS blablablabla Linux macOS {% endif %} blablabla generic with a [link](generic.md) -## Subtitle 5 generic +## Subtitle 5 g blablabla From 56543f03ddbba5df7477e78468c8a9e46e92f227 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 13:41:23 +0200 Subject: [PATCH 122/152] small fix for double title bug --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a041160c855..1530eedf31c 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -839,7 +839,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or if len(jinja_text) != 0: # add first subtitle in front of section again - if options[SPLIT_ON_TITLES] or metadata[SUBTITLE].replace("-", " ") not in jinja_text[:len(metadata[SUBTITLE]) + 1]: + if options[SPLIT_ON_TITLES] or metadata[SUBTITLE] not in make_valid_title(jinja_text[:len(metadata[SUBTITLE]) + 1]): jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text else: jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text From 52a3861bec953f687c6317a1e180f9c27124d304 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 13:44:13 +0200 Subject: [PATCH 123/152] added examples of output of the script when splitting on paragraphs with a min_paragraph_length of 683 --- .../generic/account/account_paragraph_1.txt | 13 +++++++ .../generic/account/account_paragraph_10.txt | 19 ++++++++++ .../account_paragraph_10_metadata.json | 11 ++++++ .../generic/account/account_paragraph_12.txt | 17 +++++++++ .../account_paragraph_12_metadata.json | 11 ++++++ .../account/account_paragraph_1_metadata.json | 14 ++++++++ .../generic/account/account_paragraph_2.txt | 6 ++++ .../account/account_paragraph_2_metadata.json | 16 +++++++++ .../generic/account/account_paragraph_3.txt | 11 ++++++ .../account/account_paragraph_3_metadata.json | 11 ++++++ .../generic/account/account_paragraph_8.txt | 13 +++++++ .../account/account_paragraph_8_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_1.txt | 13 +++++++ .../connecting/connecting_paragraph_14.txt | 7 ++++ .../connecting_paragraph_14_metadata.json | 14 ++++++++ .../connecting/connecting_paragraph_15.txt | 12 +++++++ .../connecting_paragraph_15_metadata.json | 15 ++++++++ .../connecting_paragraph_1_metadata.json | 14 ++++++++ .../connecting/connecting_paragraph_2.txt | 14 ++++++++ .../connecting_paragraph_2_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_3.txt | 12 +++++++ .../connecting_paragraph_3_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_6.txt | 16 +++++++++ .../connecting_paragraph_6_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_7.txt | 22 ++++++++++++ .../connecting_paragraph_7_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_8.txt | 13 +++++++ .../connecting_paragraph_8_metadata.json | 14 ++++++++ .../connecting/connecting_paragraph_9.txt | 27 ++++++++++++++ .../connecting_paragraph_9_metadata.json | 11 ++++++ .../account/account_linux_paragraph_11.1.txt | 11 ++++++ ...account_linux_paragraph_11.1_metadata.json | 14 ++++++++ .../account/account_linux_paragraph_4.1.txt | 10 ++++++ .../account_linux_paragraph_4.1_metadata.json | 14 ++++++++ .../account/account_linux_paragraph_5.1.txt | 14 ++++++++ .../account_linux_paragraph_5.1_metadata.json | 11 ++++++ .../account/account_linux_paragraph_5.2.txt | 13 +++++++ .../account_linux_paragraph_5.2_metadata.json | 11 ++++++ .../account/account_linux_paragraph_5.3.txt | 17 +++++++++ .../account_linux_paragraph_5.3_metadata.json | 11 ++++++ .../account/account_linux_paragraph_5.4.txt | 18 ++++++++++ .../account_linux_paragraph_5.4_metadata.json | 11 ++++++ .../account/account_linux_paragraph_5.5.txt | 6 ++++ .../account_linux_paragraph_5.5_metadata.json | 11 ++++++ .../account/account_linux_paragraph_6.1.txt | 1 + .../account_linux_paragraph_6.1_metadata.json | 11 ++++++ .../account/account_linux_paragraph_7.1.txt | 14 ++++++++ .../account_linux_paragraph_7.1_metadata.json | 14 ++++++++ .../account/account_linux_paragraph_7.2.txt | 8 +++++ .../account_linux_paragraph_7.2_metadata.json | 11 ++++++ .../account/account_linux_paragraph_9.1.txt | 6 ++++ .../account_linux_paragraph_9.1_metadata.json | 11 ++++++ .../connecting_linux_paragraph_10.1.txt | 35 +++++++++++++++++++ ...necting_linux_paragraph_10.1_metadata.json | 11 ++++++ .../connecting_linux_paragraph_11.1.txt | 6 ++++ ...necting_linux_paragraph_11.1_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.1.txt | 12 +++++++ ...necting_linux_paragraph_12.1_metadata.json | 14 ++++++++ .../connecting_linux_paragraph_12.2.txt | 17 +++++++++ ...necting_linux_paragraph_12.2_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.3.txt | 22 ++++++++++++ ...necting_linux_paragraph_12.3_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.4.txt | 14 ++++++++ ...necting_linux_paragraph_12.4_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.5.txt | 14 ++++++++ ...necting_linux_paragraph_12.5_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.6.txt | 18 ++++++++++ ...necting_linux_paragraph_12.6_metadata.json | 15 ++++++++ .../connecting_linux_paragraph_13.1.txt | 10 ++++++ ...necting_linux_paragraph_13.1_metadata.json | 11 ++++++ .../connecting_linux_paragraph_4.1.txt | 7 ++++ ...nnecting_linux_paragraph_4.1_metadata.json | 15 ++++++++ .../connecting_linux_paragraph_5.1.txt | 12 +++++++ ...nnecting_linux_paragraph_5.1_metadata.json | 14 ++++++++ .../connecting_linux_paragraph_5.2.txt | 4 +++ ...nnecting_linux_paragraph_5.2_metadata.json | 11 ++++++ .../account/account_macos_paragraph_11.1.txt | 11 ++++++ ...account_macos_paragraph_11.1_metadata.json | 14 ++++++++ .../account/account_macos_paragraph_4.1.txt | 10 ++++++ .../account_macos_paragraph_4.1_metadata.json | 14 ++++++++ .../account/account_macos_paragraph_5.1.txt | 12 +++++++ .../account_macos_paragraph_5.1_metadata.json | 11 ++++++ .../account/account_macos_paragraph_5.2.txt | 13 +++++++ .../account_macos_paragraph_5.2_metadata.json | 11 ++++++ .../account/account_macos_paragraph_5.3.txt | 20 +++++++++++ .../account_macos_paragraph_5.3_metadata.json | 11 ++++++ .../account/account_macos_paragraph_5.4.txt | 18 ++++++++++ .../account_macos_paragraph_5.4_metadata.json | 11 ++++++ .../account/account_macos_paragraph_5.5.txt | 6 ++++ .../account_macos_paragraph_5.5_metadata.json | 11 ++++++ .../account/account_macos_paragraph_6.1.txt | 1 + .../account_macos_paragraph_6.1_metadata.json | 11 ++++++ .../account/account_macos_paragraph_7.1.txt | 14 ++++++++ .../account_macos_paragraph_7.1_metadata.json | 14 ++++++++ .../account/account_macos_paragraph_7.2.txt | 7 ++++ .../account_macos_paragraph_7.2_metadata.json | 11 ++++++ .../account/account_macos_paragraph_9.1.txt | 11 ++++++ .../account_macos_paragraph_9.1_metadata.json | 11 ++++++ .../connecting_macos_paragraph_10.1.txt | 35 +++++++++++++++++++ ...necting_macos_paragraph_10.1_metadata.json | 11 ++++++ .../connecting_macos_paragraph_11.1.txt | 6 ++++ ...necting_macos_paragraph_11.1_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.1.txt | 12 +++++++ ...necting_macos_paragraph_12.1_metadata.json | 14 ++++++++ .../connecting_macos_paragraph_12.2.txt | 17 +++++++++ ...necting_macos_paragraph_12.2_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.3.txt | 22 ++++++++++++ ...necting_macos_paragraph_12.3_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.4.txt | 14 ++++++++ ...necting_macos_paragraph_12.4_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.5.txt | 14 ++++++++ ...necting_macos_paragraph_12.5_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.6.txt | 18 ++++++++++ ...necting_macos_paragraph_12.6_metadata.json | 15 ++++++++ .../connecting_macos_paragraph_13.1.txt | 12 +++++++ ...necting_macos_paragraph_13.1_metadata.json | 11 ++++++ .../connecting_macos_paragraph_13.2.txt | 6 ++++ ...necting_macos_paragraph_13.2_metadata.json | 11 ++++++ .../connecting_macos_paragraph_4.1.txt | 7 ++++ ...nnecting_macos_paragraph_4.1_metadata.json | 15 ++++++++ .../connecting_macos_paragraph_5.1.txt | 10 ++++++ ...nnecting_macos_paragraph_5.1_metadata.json | 14 ++++++++ .../connecting_macos_paragraph_5.2.txt | 7 ++++ ...nnecting_macos_paragraph_5.2_metadata.json | 11 ++++++ .../account_windows_paragraph_11.1.txt | 11 ++++++ ...count_windows_paragraph_11.1_metadata.json | 14 ++++++++ .../account/account_windows_paragraph_4.1.txt | 14 ++++++++ ...ccount_windows_paragraph_4.1_metadata.json | 14 ++++++++ .../account/account_windows_paragraph_4.2.txt | 13 +++++++ ...ccount_windows_paragraph_4.2_metadata.json | 11 ++++++ .../account/account_windows_paragraph_4.3.txt | 13 +++++++ ...ccount_windows_paragraph_4.3_metadata.json | 15 ++++++++ .../account/account_windows_paragraph_4.4.txt | 17 +++++++++ ...ccount_windows_paragraph_4.4_metadata.json | 11 ++++++ .../account/account_windows_paragraph_4.5.txt | 7 ++++ ...ccount_windows_paragraph_4.5_metadata.json | 11 ++++++ .../account/account_windows_paragraph_6.1.txt | 13 +++++++ ...ccount_windows_paragraph_6.1_metadata.json | 11 ++++++ .../account/account_windows_paragraph_6.2.txt | 11 ++++++ ...ccount_windows_paragraph_6.2_metadata.json | 15 ++++++++ .../account/account_windows_paragraph_6.3.txt | 5 +++ ...ccount_windows_paragraph_6.3_metadata.json | 11 ++++++ .../account/account_windows_paragraph_9.1.txt | 7 ++++ ...ccount_windows_paragraph_9.1_metadata.json | 11 ++++++ .../connecting_windows_paragraph_10.1.txt | 5 +++ ...cting_windows_paragraph_10.1_metadata.json | 11 ++++++ .../connecting_windows_paragraph_11.1.txt | 11 ++++++ ...cting_windows_paragraph_11.1_metadata.json | 11 ++++++ .../connecting_windows_paragraph_11.2.txt | 19 ++++++++++ ...cting_windows_paragraph_11.2_metadata.json | 11 ++++++ .../connecting_windows_paragraph_11.3.txt | 7 ++++ ...cting_windows_paragraph_11.3_metadata.json | 11 ++++++ .../connecting_windows_paragraph_4.1.txt | 11 ++++++ ...ecting_windows_paragraph_4.1_metadata.json | 15 ++++++++ .../connecting_windows_paragraph_4.2.txt | 13 +++++++ ...ecting_windows_paragraph_4.2_metadata.json | 11 ++++++ .../connecting_windows_paragraph_4.3.txt | 13 +++++++ ...ecting_windows_paragraph_4.3_metadata.json | 14 ++++++++ .../connecting_windows_paragraph_4.4.txt | 11 ++++++ ...ecting_windows_paragraph_4.4_metadata.json | 11 ++++++ 160 files changed, 1976 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt new file mode 100644 index 00000000000..1b79fd22391 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt @@ -0,0 +1,13 @@ +Getting an HPC Account +Getting ready to request an account +All users of AUGent can request +an +account on the HPC, which is part of the Flemish Supercomputing Centre (VSC). +See HPC policies for more information on who is entitled to an account. +The VSC, abbreviation of Flemish Supercomputer Centre, is a virtual +supercomputer centre. It is a partnership between the five Flemish +associations: the Association KUĀ Leuven, Ghent University Association, +Brussels University Association, Antwerp University Association and the +University Colleges-Limburg. The VSC is funded by the Flemish +Government. +There are two methods for connecting to HPC-UGent infrastructure: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt new file mode 100644 index 00000000000..371dd9db52b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt @@ -0,0 +1,19 @@ +Welcome e-mail +Within one day, you should receive a Welcome e-mail with your VSC +account details. +Dear (Username), +Your VSC-account has been approved by an administrator. +Your vsc-username is vsc40000 +Your account should be fully active within one hour. +To check or update your account information please visit +https://account.vscentrum.be/ +For further info please visit https://www.vscentrum.be/user-portal +Kind regards, +-- The VSC administrators + +Now, you can start using the HPC. You can always look up your VSC id later +by visiting . +Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json new file mode 100644 index 00000000000..4b5b5202d1c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "", + "previous_title": "account_paragraph_9", + "next_title": "account_paragraph_11", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#adding-multiple-ssh-public-keys-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt new file mode 100644 index 00000000000..6ee6880838e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt @@ -0,0 +1,17 @@ +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. +Computation Workflow on the HPC +A typical Computation workflow will be: +1. Connect to the HPC +2. Transfer your files to the HPC +3. Compile your code and test it +4. Create a job script +5. Submit your job +6. Wait while + 1. your job gets into the queue + 2. your job gets executed + 3. your job finishes +7. Move your results +We'll take you through the different tasks one by one in the following +chapters. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json new file mode 100644 index 00000000000..a5df035df49 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Computation-Workflow-on-the-HPC", + "title_depth": 2, + "directory": "account", + "parent_title": "", + "previous_title": "account_paragraph_11", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#computation-workflow-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json new file mode 100644 index 00000000000..726ce9f94fa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Getting-ready-to-request-an-account", + "title_depth": 2, + "directory": "account", + "links": { + "0": "../sites/hpc_policies" + }, + "parent_title": "", + "previous_title": null, + "next_title": "account_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt new file mode 100644 index 00000000000..6ecd65e2184 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt @@ -0,0 +1,6 @@ +- Using a terminal to connect via SSH. +- Using the web portal +The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). +If you would like use a terminal with SSH as this gives you more flexibility continue reading. +However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account. +Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json new file mode 100644 index 00000000000..257f886c6e0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "account", + "subtitle": "Getting-ready-to-request-an-account", + "title_depth": 2, + "directory": "account", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/account/#applying-for-the-account", + "2": "https://docs.hpc.ugent.be/web_portal" + }, + "parent_title": "", + "previous_title": "account_paragraph_1", + "next_title": "account_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt new file mode 100644 index 00000000000..9632ef1f5af --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt @@ -0,0 +1,11 @@ +The HPC-UGent infrastructure clusters use public/private key pairs for user authentication +(rather than passwords). Technically, the private key is stored on your +local computer and always stays there; the public key is stored on the HPC. +Access to the HPC is granted to anyone who can prove to have access to the +corresponding private key on his local computer. +How do SSH keys work? +- an SSH public/private key pair can be seen as a lock and a key +- the SSH public key is equivalent with a lock: you give it to the + VSC and they put it on the door that gives access to your account. +- the SSH private key is like a physical key: you don't hand it out + to other people. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json new file mode 100644 index 00000000000..b94f233779b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "title_depth": 3, + "directory": "account", + "parent_title": "", + "previous_title": "account_paragraph_2", + "next_title": "account_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt new file mode 100644 index 00000000000..125b566419a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt @@ -0,0 +1,13 @@ +Applying for the account +Visit +You will be redirected to our WAYF (Where Are You From) service where +you have to select your "Home Organisation". +Select "UGent" in the dropdown box and optionally select "Save my preference" +and "permanently". +Click "Confirm" +You will now be taken to the authentication page of your institute. +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json new file mode 100644 index 00000000000..6d186b6ff46 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Applying-for-the-account", + "title_depth": 2, + "directory": "account", + "parent_title": "", + "previous_title": "account_paragraph_7", + "next_title": "account_paragraph_9", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#applying-for-the-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt new file mode 100644 index 00000000000..b144712c9df --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt @@ -0,0 +1,13 @@ +Connecting to the HPC infrastructure +Before you can really start using the HPC clusters, there are several things +you need to do or know: +1. You need to log on to the cluster using an SSH client to one of + the login nodes or by using the HPC web portal. + This will give you command-line access. + A standard web browser like Firefox or Chrome for the web portal will suffice. +2. Before you can do some work, you'll have to transfer the files + that you need from your desktop computer to the cluster. At the end + of a job, you might want to transfer some files back. +3. Optionally, if you wish to use programs with a **graphical user + interface**, you will need an X-server on your client system and log + in to the login nodes with X-forwarding enabled. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt new file mode 100644 index 00000000000..df00d4ed2a4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt @@ -0,0 +1,7 @@ +Fast file transfer for large datasets +See the section on rsync in chapter 5 of the Linux intro manual. +Changing login nodes +It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. +For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: +ssh gligar07.gastly.os +This is also possible the other way around. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json new file mode 100644 index 00000000000..0543efa4083 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Changing-login-nodes", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_13", + "next_title": "connecting_paragraph_15", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt new file mode 100644 index 00000000000..b2197618647 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt @@ -0,0 +1,12 @@ +If you want to find out which login host you are connected to, you can use the hostname command. +$ hostname +gligar07.gastly.os +$ ssh gligar08.gastly.os +$ hostname +gligar08.gastly.os + +Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux. +These can make sessions that 'survives' across disconnects. +You can find more information on how to use these tools here (or on other online sources): +- screen +- tmux \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json new file mode 100644 index 00000000000..d23146ed79f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Changing-login-nodes", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/", + "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_14", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json new file mode 100644 index 00000000000..ef0bc5473b0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Connecting-to-the-HPC-infrastructure", + "title_depth": 1, + "directory": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal" + }, + "parent_title": "", + "previous_title": null, + "next_title": "connecting_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#connecting-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt new file mode 100644 index 00000000000..4c1d879b954 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt @@ -0,0 +1,14 @@ +4. Often several versions of software packages and libraries are + installed, so you need to select the ones you need. To manage + different versions efficiently, the VSC clusters use so-called + modules, so you will need to select and load the modules that + you need. +Connection restrictions +Since March 20th 2020, restrictions are in place that limit from where +you can connect to the VSC HPC infrastructure, in response to security +incidents involving several European HPC centres. +VSC login nodes are only directly accessible from within university +networks, and from (most) Belgian commercial internet providers. +All other IP domains are blocked by default. If you are connecting from +an IP address that is not allowed direct access, you have the following +options to get access to VSC login nodes: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json new file mode 100644 index 00000000000..39ee53fcf0b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Connection-restrictions", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_1", + "next_title": "connecting_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt new file mode 100644 index 00000000000..668a1e6df57 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt @@ -0,0 +1,12 @@ +- Use an VPN connection to connect to UGent the network (recommended). +- Whitelist your IP address automatically by accessing + and log in with your UGent account. + - While this web connection is active new SSH sessions can be + started. + - Active SSH sessions will remain active even when this web page + is closed. +- Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your + IP range (e.g., for industry access, automated processes). +Trying to establish an SSH connection from an IP address that does not +adhere to these restrictions will result in an immediate failure to +connect, with an error message like: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json new file mode 100644 index 00000000000..4dc75d7dcf3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Connection-restrictions", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_2", + "next_title": "connecting_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt new file mode 100644 index 00000000000..472991adada --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt @@ -0,0 +1,16 @@ +Congratulations, you're on the HPC infrastructure now! +To find out where you have landed you can print the current working directory: +$ pwd +/user/home/gent/vsc400/vsc40000 + +Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own +subdirectory structure, copy and prepare your applications, compile and +test them and submit your jobs on the HPC. +$ cd /apps/gent/tutorials +$ ls +Intro-HPC/ + +This directory currently contains all training material for the Introduction to the HPC. More +relevant training material to work with the HPC can always be added later in +this directory. +You can now explore the content of this directory with the "ls --l" (lists long) and the "cd" (change directory) commands: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json new file mode 100644 index 00000000000..1c7ae8ed267 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_5", + "next_title": "connecting_paragraph_7", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt new file mode 100644 index 00000000000..35996afe4da --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt @@ -0,0 +1,22 @@ +As we are interested in the use of the HPC, move further to Intro-HPC and explore the +contents up to 2 levels deep: +$ cd Intro-HPC +$ tree -L 2 +. +'-- examples + |-- Compiling-and-testing-your-software-on-the-HPC + |-- Fine-tuning-Job-Specifications + |-- Multi-core-jobs-Parallel-Computing + |-- Multi-job-submission + |-- Program-examples + |-- Running-batch-jobs + |-- Running-jobs-with-input + |-- Running-jobs-with-input-output-data + |-- example.pbs + '-- example.sh +9 directories, 5 files + +This directory contains: +1. This HPC Tutorial (in either a Mac, Linux or Windows version). +2. An examples subdirectory, containing all the examples that you need in this + Tutorial, as well as examples that might be useful for your specific applications. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json new file mode 100644 index 00000000000..709753e4dc4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_6", + "next_title": "connecting_paragraph_8", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt new file mode 100644 index 00000000000..096c74c1372 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt @@ -0,0 +1,13 @@ +$ cd examples + + tip + Typing cd ex followed by tab (the Tab-key) will generate the cd examples + command. Command-line completion (also tab completion) is a common feature of the bash command + line interpreter, in which the program automatically fills in partially + typed commands. + tip + For more exhaustive tutorials about Linux usage, see Appendix Useful Linux Commands +The first action is to copy the contents of the HPC examples directory to +your home directory, so that you have your own personal copy and that +you can start using the examples. The "-r" option of the copy command +will also copy the contents of the sub-directories "recursively". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json new file mode 100644 index 00000000000..0241e0bd6b9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "../useful_linux_commands" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_7", + "next_title": "connecting_paragraph_9", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt new file mode 100644 index 00000000000..5a634e6bddc --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt @@ -0,0 +1,27 @@ +$ cp -r /apps/gent/tutorials/Intro-HPC/examples ~/ + +You can exit the connection at anytime by entering: +$ exit +logout +Connection to login.hpc.ugent.be closed. + + tip "tip: Setting your Language right" + You may encounter a warning message similar to the following one during connecting: + perl: warning: Setting locale failed. + perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_CTYPE = "UTF-8", + LANG = (unset) + are supported and installed on your system. + perl: warning: Falling back to the standard locale ("C"). + or any other error message complaining about the locale. + This means that the correct "locale" has not yet been properly specified on your local machine. Try: + LANG= + LC_COLLATE="C" + LC_CTYPE="UTF-8" + LC_MESSAGES="C" + LC_MONETARY="C" + LC_NUMERIC="C" + LC_TIME="C" + LC_ALL= diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json new file mode 100644 index 00000000000..40b04f24e9f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_8", + "next_title": "connecting_paragraph_10", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt new file mode 100644 index 00000000000..3a46897bdee --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt @@ -0,0 +1,11 @@ +Adding multiple SSH public keys (optional) +1. Create a new public/private SSH key pair from the new computer. + Repeat the process described in + sectionĀ Generate a public/private key pair with OpenSSH. +2. Go to +3. Upload the new SSH public key using the Add public key section. Make sure that your + public key is actually saved, because a public key will be refused + if it is too short, wrong type, or in a wrong format. +4. (optional) If you lost your key, you can delete the old key on the + same page. You should keep at least one valid public SSH key in your + account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json new file mode 100644 index 00000000000..72b9f92061c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Applying-for-the-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" + }, + "previous_title": "account_paragraph_10", + "next_title": "account_paragraph_12", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#adding-multiple-ssh-public-keys-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt new file mode 100644 index 00000000000..1395e2ee7bd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt @@ -0,0 +1,10 @@ +How do SSH keys work +- anyone who has the key (and the optional password) can unlock the + door and log in to the account. +- the door to your VSC account is special: it can have multiple + locks (SSH public keys) attached to it, and you only need to open + one lock with the corresponding key (SSH private key) to open + the door (log in to the account). +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json new file mode 100644 index 00000000000..52e1569a8a7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../../linux-tutorial" + }, + "previous_title": "account_paragraph_3", + "next_title": "account_paragraph_5", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt new file mode 100644 index 00000000000..caaaea5ee91 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt @@ -0,0 +1,14 @@ +How do SSH keys work +Launch a terminal from your desktop's application menu and you will see +the bash shell. There are other shells, but most Linux distributions use +bash by default. +Test OpenSSH +Secure Shell (ssh) is a cryptographic network protocol for secure data +communication, remote command-line login, remote command execution, and +other secure network services between two networked computers. In short, +ssh provides a secure connection between 2 computers via insecure +channels (Network, Internet, telephone lines, ...). +"Secure" means that: +1. the User is authenticated to the System; and +2. the System is authenticated to the User; and +3. all data is encrypted during transfer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json new file mode 100644 index 00000000000..4636f13a4b4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Test-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_4", + "next_title": "account_linux_paragraph_5.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt new file mode 100644 index 00000000000..a166dd14503 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt @@ -0,0 +1,13 @@ +OpenSSH is a FREE implementation of the SSH connectivity protocol. comes +with its own implementation of OpenSSH, so you don't need to install any +third-party software to use it. Just open a terminal window and jump in! +On all popular Linux distributions, the OpenSSH software is readily +available, and most often installed by default. You can check whether +the OpenSSH software is installed by opening a terminal and typing: +$ ssh -V +OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 +To access the clusters and transfer your files, you will use the +following commands: +1. ssh-keygen: to generate the SSH key pair (public + private key); +2. ssh: to open a shell on a remote machine; +3. sftp: a secure equivalent of ftp; diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json new file mode 100644 index 00000000000..ca9c4c7dc1d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Test-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_5.1", + "next_title": "account_linux_paragraph_5.3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt new file mode 100644 index 00000000000..2e8fe9e3a24 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt @@ -0,0 +1,17 @@ +4. scp: a secure equivalent of the remote copy command rcp. +Generate a public/private key pair with OpenSSH +A key pair might already be present in the default location inside your +home directory. Therefore, we first check if a key is available with the +"list short" ("ls") command: +$ ls ~/.ssh +If a key-pair is already available, you would normally get: +authorized_keys id_rsa id_rsa.pub known_hosts +Otherwise, the command will show: +ls: .ssh: No such file or directory +You can recognise a public/private key pair when a pair of files has the +same name except for the extension ".pub" added to one of them. In this +particular case, the private key is "id_rsa" and public key is +"id_rsa.pub". You may have multiple keys (not necessarily in the +directory "~/.ssh") if you or your operating system requires this. Be +aware that your existing key pair might be too short, or not the right +type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json new file mode 100644 index 00000000000..d902f6a0838 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_5.2", + "next_title": "account_linux_paragraph_5.4", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt new file mode 100644 index 00000000000..3cde4395d81 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt @@ -0,0 +1,18 @@ +You will need to generate a new key pair, when: +1. you don't have a key pair yet +2. you forgot the passphrase protecting your private key +3. your private key was compromised +4. your key pair is too short or not the right type +For extra security, the private key itself can be encrypted using a +"passphrase", to prevent anyone from using your private key even when +they manage to copy it. You have to "unlock" the private key by typing +the passphrase. Be sure to never give away your private key, it is +private and should stay private. You should not even copy it to one of +your other machines, instead, you should create a new public/private key +pair for each machine. +$ ssh-keygen -t rsa -b 4096 +Generating public/private rsa key pair. Enter file in which to save the +key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no +passphrase): Enter same passphrase again: Your identification has been +saved in /home/user/.ssh/id_rsa. Your public key has been saved in +/home/user/.ssh/id_rsa.pub. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json new file mode 100644 index 00000000000..1edae26d97b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_5.3", + "next_title": "account_linux_paragraph_5.5", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt new file mode 100644 index 00000000000..78c142e82e0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt @@ -0,0 +1,6 @@ +This will ask you for a file name to store the private and public key, +and a passphrase to protect your private key. It needs to be emphasised +that you really should choose the passphrase wisely! The system will ask +you for it every time you want to use the private key that is every time +you want to access the cluster or transfer your files. +Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json new file mode 100644 index 00000000000..29affc0335e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_5.4", + "next_title": "account_paragraph_6", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt new file mode 100644 index 00000000000..c3b395b5296 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt @@ -0,0 +1 @@ +Using an SSH agent (optional) \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json new file mode 100644 index 00000000000..acf12bc0a7d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_5", + "next_title": "account_paragraph_7", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt new file mode 100644 index 00000000000..e3ef2176f09 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt @@ -0,0 +1,14 @@ +Using an SSH agent (optional) +Most recent Unix derivatives include by default an SSH agent ("gnome-keyring-daemon" in most cases) +to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into +the SSH manager keyring to be able to connect to the HPC cluster. If +not, SSH client will display an error message (see Connecting) similar to this: +Agent admitted failure to sign using the key. +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +This could be fixed using the ssh-add command. You can include the new +private keys' identities in your keyring with: +$ ssh-add + tip + Without extra options ssh-add adds any key located at $HOME/.ssh + directory, but you can specify the private key location path as + argument, as example: ssh-add /path/to/my/id_rsa. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json new file mode 100644 index 00000000000..b6b1e052345 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../connecting" + }, + "previous_title": "account_paragraph_6", + "next_title": "account_linux_paragraph_7.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt new file mode 100644 index 00000000000..93019fa1a6a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt @@ -0,0 +1,8 @@ +Check that your key is available from the keyring with: +$ ssh-add -l +After these changes the key agent will keep your SSH key to connect to +the clusters as usual. + tip + You should execute ssh-add command again if you generate a new SSH + key. +Visit for more information. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json new file mode 100644 index 00000000000..35466be5b56 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_7.1", + "next_title": "account_paragraph_8", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt new file mode 100644 index 00000000000..a9059b224bf --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt @@ -0,0 +1,6 @@ +Applying for the account +This file has been stored in the directory "~/.ssh/". +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json new file mode 100644 index 00000000000..21988388723 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Applying-for-the-account", + "title_depth": 2, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_8", + "next_title": "account_paragraph_10", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#applying-for-the-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt new file mode 100644 index 00000000000..3e588c709d4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt @@ -0,0 +1,35 @@ +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json new file mode 100644 index 00000000000..364c81834cf --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_9", + "next_title": "connecting_paragraph_11", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt new file mode 100644 index 00000000000..d872c89a0f8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt @@ -0,0 +1,6 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json new file mode 100644 index 00000000000..420f73742f5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Transfer-Files-tofrom-the-HPC", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_10", + "next_title": "connecting_paragraph_12", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt new file mode 100644 index 00000000000..8d0031fcca9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt @@ -0,0 +1,12 @@ +Transfer Files tofrom the HPC +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json new file mode 100644 index 00000000000..19eba778d90 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, + "previous_title": "connecting_paragraph_11", + "next_title": "connecting_linux_paragraph_12.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt new file mode 100644 index 00000000000..f1da0677a67 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt @@ -0,0 +1,17 @@ +$ hostname + +If you're still using the terminal that is connected to the HPC, close the +connection by typing "exit" in the terminal window. +For example, we will copy the (local) file "localfile.txt" to your +home directory on the HPC cluster. We first generate a small dummy +"localfile.txt", which contains the word "Hello". Use your own VSC +account, which is something like "vsc40000". Don't forget the colon (:) at the +end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your +local filesystem. You can even specify where to save the file on the +remote filesystem by putting a path after the colon. +$ echo "Hello" > localfile.txt +$ ls -l +... +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +$ scp localfile.txt vsc40000@login.hpc.ugent.be: +localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json new file mode 100644 index 00000000000..0b3a3418c55 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_linux_paragraph_12.1", + "next_title": "connecting_linux_paragraph_12.3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt new file mode 100644 index 00000000000..9585900e356 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt @@ -0,0 +1,22 @@ +Connect to the HPC via another terminal, print the working directory (to +make sure you're in the home directory) and check whether the file has +arrived: +$ pwd +/user/home/gent/vsc400/vsc40000 +$ ls -l +total 1536 +drwxrwxr-x 2 +drwxrwxr-x 2 +drwxrwxr-x 10 +-rw-r--r-- 1 +$ cat localfile.txt +Hello +The scp command can also be used to copy files from the cluster to your +local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" +subdirectory on the cluster to your local computer. +First, we will confirm that the file is indeed in the "docs" +subdirectory. In the terminal on the login node, enter: +$ cd ~/docs +$ ls -l +total 1536 +-rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json new file mode 100644 index 00000000000..5624749ede8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_linux_paragraph_12.2", + "next_title": "connecting_linux_paragraph_12.4", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt new file mode 100644 index 00000000000..2664953ed0c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt @@ -0,0 +1,14 @@ +Now we will copy the file to the local machine. On the terminal on your +own local computer, enter: +$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . +intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 +$ ls -l +total 899 +-rw-r--r-- 1 user staff 741995 Sep 18 09:53 +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +The file has been copied from the HPC to your local computer. +It's also possible to copy entire directories (and their contents) with +the -r flag. For example, if we want to copy the local directory +dataset to $VSC_SCRATCH, we can use the following command (assuming +you've created the scratch symlink): +$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json new file mode 100644 index 00000000000..5a401911cab --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_linux_paragraph_12.3", + "next_title": "connecting_linux_paragraph_12.5", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt new file mode 100644 index 00000000000..51d39b548c3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt @@ -0,0 +1,14 @@ +If you don't use the -r option to copy a directory, you will run into +the following error: +$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +dataset: not a regular file +Using sftp +The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file +transfer and file management functionalities over any reliable data +stream. It was designed as an extension of the Secure Shell protocol +(SSH) version 2.0. This protocol assumes that it is run over a secure +channel, such as SSH, that the server has already authenticated the +client, and that the identity of the client user is available to the +protocol. +The sftp is an equivalent of the ftp command, with the difference that +it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json new file mode 100644 index 00000000000..a479f66e7e0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_linux_paragraph_12.4", + "next_title": "connecting_linux_paragraph_12.6", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt new file mode 100644 index 00000000000..4ae257101f1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt @@ -0,0 +1,18 @@ +One easy way of starting a sftp session is +$ sftp vsc40000@login.hpc.ugent.be +Typical and popular commands inside an sftp session are: +| | | +|:--------------------------|:-------------------------------------------------------------------------------------| +| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | +| ls | Get a list of the files in the current directory on the HPC. | +| get fibo.py | Copy the file "fibo.py" from the HPC | +| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | +| lcd test | Move to the "test" subdirectory on your local machine. | +| lcd .. | Move up one level in the local directory. | +| lls | Get local directory listing. | +| put test.py | Copy the local file test.py to the HPC. | +| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | +| bye | Quit the sftp session | +| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | +| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | +| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json new file mode 100644 index 00000000000..9c744fd5133 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "", + "1": "" + }, + "previous_title": "connecting_linux_paragraph_12.5", + "next_title": "connecting_linux_paragraph_12.7", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt new file mode 100644 index 00000000000..a0496edfb14 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt @@ -0,0 +1,10 @@ +Transfer Files tofrom the HPC +Using a GUI +If you prefer a GUI to transfer files back and forth to the HPC, you can +use your file browser. Open your file browser and press +++"Ctrl"+"l"++ +This should open up a address bar where you can enter a URL. +Alternatively, look for the "connect to server" option in your file +browsers menu. +Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter. +You should now be able to browse files on the HPC in your file browser. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json new file mode 100644 index 00000000000..d634a356654 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_paragraph_12", + "next_title": "connecting_paragraph_14", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt new file mode 100644 index 00000000000..773d03f0689 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt @@ -0,0 +1,7 @@ +First Time connection to the HPC infrastructure +ssh_exchange_identification: read: Connection reset by peer +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json new file mode 100644 index 00000000000..f6745fc31dc --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + }, + "previous_title": "connecting_paragraph_3", + "next_title": "connecting_paragraph_5", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt new file mode 100644 index 00000000000..94d5d9500a3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt @@ -0,0 +1,12 @@ +First Time connection to the HPC infrastructure +Connect +Open up a terminal and enter the following command to connect to the HPC. +$ ssh vsc40000@login.hpc.ugent.be +Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login +node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. +The first time you make a connection to the login node, you will be +asked to verify the authenticity of the login node. Please check +Warning message when first connecting to new host on how to do this. +A possible error message you can get if you previously saved your +private key somewhere else than the default location +($HOME/.ssh/id_rsa): diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json new file mode 100644 index 00000000000..05996eb5df2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + }, + "previous_title": "connecting_paragraph_4", + "next_title": "connecting_linux_paragraph_5.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt new file mode 100644 index 00000000000..312fe885cb0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt @@ -0,0 +1,4 @@ +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +In this case, use the -i option for the ssh command to specify the +location of your private key. For example: +$ ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json new file mode 100644 index 00000000000..85a826e41a3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_linux_paragraph_5.1", + "next_title": "connecting_paragraph_6", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt new file mode 100644 index 00000000000..3a46897bdee --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt @@ -0,0 +1,11 @@ +Adding multiple SSH public keys (optional) +1. Create a new public/private SSH key pair from the new computer. + Repeat the process described in + sectionĀ Generate a public/private key pair with OpenSSH. +2. Go to +3. Upload the new SSH public key using the Add public key section. Make sure that your + public key is actually saved, because a public key will be refused + if it is too short, wrong type, or in a wrong format. +4. (optional) If you lost your key, you can delete the old key on the + same page. You should keep at least one valid public SSH key in your + account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json new file mode 100644 index 00000000000..dd8b3400419 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Applying-for-the-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" + }, + "previous_title": "account_paragraph_10", + "next_title": "account_paragraph_12", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#adding-multiple-ssh-public-keys-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt new file mode 100644 index 00000000000..1395e2ee7bd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt @@ -0,0 +1,10 @@ +How do SSH keys work +- anyone who has the key (and the optional password) can unlock the + door and log in to the account. +- the door to your VSC account is special: it can have multiple + locks (SSH public keys) attached to it, and you only need to open + one lock with the corresponding key (SSH private key) to open + the door (log in to the account). +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json new file mode 100644 index 00000000000..33d083958b9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../../linux-tutorial" + }, + "previous_title": "account_paragraph_3", + "next_title": "account_paragraph_5", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt new file mode 100644 index 00000000000..f3483fcaef1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt @@ -0,0 +1,12 @@ +How do SSH keys work +To open a Terminal window in macOS, open the Finder and choose +*\>\> Applications \> Utilities \> Terminal* +Before requesting an account, you need to generate a pair of ssh keys. +One popular way to do this on is using the OpenSSH client included with , which you can then also use to log on to the clusters. +Test OpenSSH +Secure Shell (ssh) is a cryptographic network protocol for secure data +communication, remote command-line login, remote command execution, and +other secure network services between two networked computers. In short, +ssh provides a secure connection between 2 computers via insecure +channels (Network, Internet, telephone lines, ...). +"Secure" means that: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json new file mode 100644 index 00000000000..c75d6aede58 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Test-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_4", + "next_title": "account_macos_paragraph_5.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt new file mode 100644 index 00000000000..5189a953002 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt @@ -0,0 +1,13 @@ +1. the User is authenticated to the System; and +2. the System is authenticated to the User; and +3. all data is encrypted during transfer. +OpenSSH is a FREE implementation of the SSH connectivity protocol. comes +with its own implementation of OpenSSH, so you don't need to install any +third-party software to use it. Just open a terminal window and jump in! +On all popular Linux distributions, the OpenSSH software is readily +available, and most often installed by default. You can check whether +the OpenSSH software is installed by opening a terminal and typing: +$ ssh -V +OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 +To access the clusters and transfer your files, you will use the +following commands: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json new file mode 100644 index 00000000000..7f6c80a32f6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Test-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_5.1", + "next_title": "account_macos_paragraph_5.3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt new file mode 100644 index 00000000000..2c97d597425 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt @@ -0,0 +1,20 @@ +1. ssh-keygen: to generate the SSH key pair (public + private key); +2. ssh: to open a shell on a remote machine; +3. sftp: a secure equivalent of ftp; +4. scp: a secure equivalent of the remote copy command rcp. +Generate a public/private key pair with OpenSSH +A key pair might already be present in the default location inside your +home directory. Therefore, we first check if a key is available with the +"list short" ("ls") command: +$ ls ~/.ssh +If a key-pair is already available, you would normally get: +authorized_keys id_rsa id_rsa.pub known_hosts +Otherwise, the command will show: +ls: .ssh: No such file or directory +You can recognise a public/private key pair when a pair of files has the +same name except for the extension ".pub" added to one of them. In this +particular case, the private key is "id_rsa" and public key is +"id_rsa.pub". You may have multiple keys (not necessarily in the +directory "~/.ssh") if you or your operating system requires this. Be +aware that your existing key pair might be too short, or not the right +type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json new file mode 100644 index 00000000000..7c0f0d2a04d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_5.2", + "next_title": "account_macos_paragraph_5.4", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt new file mode 100644 index 00000000000..3cde4395d81 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt @@ -0,0 +1,18 @@ +You will need to generate a new key pair, when: +1. you don't have a key pair yet +2. you forgot the passphrase protecting your private key +3. your private key was compromised +4. your key pair is too short or not the right type +For extra security, the private key itself can be encrypted using a +"passphrase", to prevent anyone from using your private key even when +they manage to copy it. You have to "unlock" the private key by typing +the passphrase. Be sure to never give away your private key, it is +private and should stay private. You should not even copy it to one of +your other machines, instead, you should create a new public/private key +pair for each machine. +$ ssh-keygen -t rsa -b 4096 +Generating public/private rsa key pair. Enter file in which to save the +key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no +passphrase): Enter same passphrase again: Your identification has been +saved in /home/user/.ssh/id_rsa. Your public key has been saved in +/home/user/.ssh/id_rsa.pub. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json new file mode 100644 index 00000000000..346108200ac --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_5.3", + "next_title": "account_macos_paragraph_5.5", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt new file mode 100644 index 00000000000..78c142e82e0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt @@ -0,0 +1,6 @@ +This will ask you for a file name to store the private and public key, +and a passphrase to protect your private key. It needs to be emphasised +that you really should choose the passphrase wisely! The system will ask +you for it every time you want to use the private key that is every time +you want to access the cluster or transfer your files. +Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json new file mode 100644 index 00000000000..25baa1e073f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_5.4", + "next_title": "account_paragraph_6", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt new file mode 100644 index 00000000000..c3b395b5296 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt @@ -0,0 +1 @@ +Using an SSH agent (optional) \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json new file mode 100644 index 00000000000..b8931a423d3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_5", + "next_title": "account_paragraph_7", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt new file mode 100644 index 00000000000..d204f4e4392 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt @@ -0,0 +1,14 @@ +Using an SSH agent (optional) +Most recent Unix derivatives include by default an SSH agent +to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into +the SSH manager keyring to be able to connect to the HPC cluster. If +not, SSH client will display an error message (see Connecting) similar to this: +Agent admitted failure to sign using the key. +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +This could be fixed using the ssh-add command. You can include the new +private keys' identities in your keyring with: +$ ssh-add + tip + Without extra options ssh-add adds any key located at $HOME/.ssh + directory, but you can specify the private key location path as + argument, as example: ssh-add /path/to/my/id_rsa. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json new file mode 100644 index 00000000000..c43391b146e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../connecting" + }, + "previous_title": "account_paragraph_6", + "next_title": "account_macos_paragraph_7.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt new file mode 100644 index 00000000000..8fd93f6b4f6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt @@ -0,0 +1,7 @@ +Check that your key is available from the keyring with: +$ ssh-add -l +After these changes the key agent will keep your SSH key to connect to +the clusters as usual. + tip + You should execute ssh-add command again if you generate a new SSH + key. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json new file mode 100644 index 00000000000..519b58bb151 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_7.1", + "next_title": "account_paragraph_8", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt new file mode 100644 index 00000000000..d11380c2519 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt @@ -0,0 +1,11 @@ +Applying for the account +This file has been stored in the directory "~/.ssh/". + tip + As ".ssh" is an invisible directory, the Finder will not show it by + default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++), + which will allow you to enter the name of a directory, which you would + like to open in Finder. Here, type "~/.ssh" and press enter. +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json new file mode 100644 index 00000000000..6b6e8c72703 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Applying-for-the-account", + "title_depth": 2, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_8", + "next_title": "account_paragraph_10", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#applying-for-the-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt new file mode 100644 index 00000000000..3e588c709d4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt @@ -0,0 +1,35 @@ +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json new file mode 100644 index 00000000000..4c6e5477119 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_9", + "next_title": "connecting_paragraph_11", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt new file mode 100644 index 00000000000..d872c89a0f8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt @@ -0,0 +1,6 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json new file mode 100644 index 00000000000..1425455ade8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Transfer-Files-tofrom-the-HPC", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_10", + "next_title": "connecting_paragraph_12", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt new file mode 100644 index 00000000000..8d0031fcca9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt @@ -0,0 +1,12 @@ +Transfer Files tofrom the HPC +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json new file mode 100644 index 00000000000..332e6ed2996 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, + "previous_title": "connecting_paragraph_11", + "next_title": "connecting_macos_paragraph_12.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt new file mode 100644 index 00000000000..f1da0677a67 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt @@ -0,0 +1,17 @@ +$ hostname + +If you're still using the terminal that is connected to the HPC, close the +connection by typing "exit" in the terminal window. +For example, we will copy the (local) file "localfile.txt" to your +home directory on the HPC cluster. We first generate a small dummy +"localfile.txt", which contains the word "Hello". Use your own VSC +account, which is something like "vsc40000". Don't forget the colon (:) at the +end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your +local filesystem. You can even specify where to save the file on the +remote filesystem by putting a path after the colon. +$ echo "Hello" > localfile.txt +$ ls -l +... +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +$ scp localfile.txt vsc40000@login.hpc.ugent.be: +localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json new file mode 100644 index 00000000000..d86cdd989ac --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_12.1", + "next_title": "connecting_macos_paragraph_12.3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt new file mode 100644 index 00000000000..9585900e356 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt @@ -0,0 +1,22 @@ +Connect to the HPC via another terminal, print the working directory (to +make sure you're in the home directory) and check whether the file has +arrived: +$ pwd +/user/home/gent/vsc400/vsc40000 +$ ls -l +total 1536 +drwxrwxr-x 2 +drwxrwxr-x 2 +drwxrwxr-x 10 +-rw-r--r-- 1 +$ cat localfile.txt +Hello +The scp command can also be used to copy files from the cluster to your +local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" +subdirectory on the cluster to your local computer. +First, we will confirm that the file is indeed in the "docs" +subdirectory. In the terminal on the login node, enter: +$ cd ~/docs +$ ls -l +total 1536 +-rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json new file mode 100644 index 00000000000..4fcc42d2337 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_12.2", + "next_title": "connecting_macos_paragraph_12.4", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt new file mode 100644 index 00000000000..2664953ed0c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt @@ -0,0 +1,14 @@ +Now we will copy the file to the local machine. On the terminal on your +own local computer, enter: +$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . +intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 +$ ls -l +total 899 +-rw-r--r-- 1 user staff 741995 Sep 18 09:53 +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +The file has been copied from the HPC to your local computer. +It's also possible to copy entire directories (and their contents) with +the -r flag. For example, if we want to copy the local directory +dataset to $VSC_SCRATCH, we can use the following command (assuming +you've created the scratch symlink): +$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json new file mode 100644 index 00000000000..757b533cf8d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_12.3", + "next_title": "connecting_macos_paragraph_12.5", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt new file mode 100644 index 00000000000..51d39b548c3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt @@ -0,0 +1,14 @@ +If you don't use the -r option to copy a directory, you will run into +the following error: +$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +dataset: not a regular file +Using sftp +The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file +transfer and file management functionalities over any reliable data +stream. It was designed as an extension of the Secure Shell protocol +(SSH) version 2.0. This protocol assumes that it is run over a secure +channel, such as SSH, that the server has already authenticated the +client, and that the identity of the client user is available to the +protocol. +The sftp is an equivalent of the ftp command, with the difference that +it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json new file mode 100644 index 00000000000..d18c7c7deb5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_12.4", + "next_title": "connecting_macos_paragraph_12.6", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt new file mode 100644 index 00000000000..4ae257101f1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt @@ -0,0 +1,18 @@ +One easy way of starting a sftp session is +$ sftp vsc40000@login.hpc.ugent.be +Typical and popular commands inside an sftp session are: +| | | +|:--------------------------|:-------------------------------------------------------------------------------------| +| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | +| ls | Get a list of the files in the current directory on the HPC. | +| get fibo.py | Copy the file "fibo.py" from the HPC | +| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | +| lcd test | Move to the "test" subdirectory on your local machine. | +| lcd .. | Move up one level in the local directory. | +| lls | Get local directory listing. | +| put test.py | Copy the local file test.py to the HPC. | +| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | +| bye | Quit the sftp session | +| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | +| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | +| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json new file mode 100644 index 00000000000..a8a4f2a3bab --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "", + "1": "" + }, + "previous_title": "connecting_macos_paragraph_12.5", + "next_title": "connecting_macos_paragraph_12.7", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt new file mode 100644 index 00000000000..c5ed84e9ea0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt @@ -0,0 +1,12 @@ +Transfer Files tofrom the HPC +Using a GUI (Cyberduck) +Cyberduck is a graphical alternative to the scp command. It can be +installed from . +This is the one-time setup you will need to do before connecting: +1. After starting Cyberduck, the Bookmark tab will show up. To add a + new bookmark, click on the "+" sign on the bottom left of the + window. A new window will open. +2. In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)". +3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in + your VSC account id (this looks like vsc40000). +4. Select the location of your SSH private key in the "SSH Private Key" field. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json new file mode 100644 index 00000000000..bd02ed8502f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI-(Cyberduck)", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_paragraph_12", + "next_title": "connecting_macos_paragraph_13.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt new file mode 100644 index 00000000000..d48d0ce00a3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt @@ -0,0 +1,6 @@ +5. Finally, type in a name for the bookmark in the "Nickname" field and + close the window by pressing on the red circle in the top left + corner of the window. +To open the connection, click on the "Bookmarks" icon (which +resembles an open book) and double-click on the bookmark you just +created. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json new file mode 100644 index 00000000000..344ff690d54 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI-(Cyberduck)", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_13.1", + "next_title": "connecting_paragraph_14", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt new file mode 100644 index 00000000000..773d03f0689 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt @@ -0,0 +1,7 @@ +First Time connection to the HPC infrastructure +ssh_exchange_identification: read: Connection reset by peer +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json new file mode 100644 index 00000000000..89431f52435 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + }, + "previous_title": "connecting_paragraph_3", + "next_title": "connecting_paragraph_5", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt new file mode 100644 index 00000000000..d4c89b7e1c7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt @@ -0,0 +1,10 @@ +First Time connection to the HPC infrastructure +Connect +Open up a terminal and enter the following command to connect to the HPC. +You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search. +$ ssh vsc40000@login.hpc.ugent.be +Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login +node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. +The first time you make a connection to the login node, you will be +asked to verify the authenticity of the login node. Please check +Warning message when first connecting to new host on how to do this. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json new file mode 100644 index 00000000000..e17629a55f3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + }, + "previous_title": "connecting_paragraph_4", + "next_title": "connecting_macos_paragraph_5.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt new file mode 100644 index 00000000000..6fa418464dd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt @@ -0,0 +1,7 @@ +A possible error message you can get if you previously saved your +private key somewhere else than the default location +($HOME/.ssh/id_rsa): +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +In this case, use the -i option for the ssh command to specify the +location of your private key. For example: +$ ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json new file mode 100644 index 00000000000..5c1d808739c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_macos_paragraph_5.1", + "next_title": "connecting_paragraph_6", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt new file mode 100644 index 00000000000..062ea570d96 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt @@ -0,0 +1,11 @@ +Adding multiple SSH public keys (optional) +1. Create a new public/private SSH key pair from Putty. Repeat the + process described in + sectionĀ Generate a public/private key pair. +2. Go to +3. Upload the new SSH public key using the Add public key section. Make sure that your + public key is actually saved, because a public key will be refused + if it is too short, wrong type, or in a wrong format. +4. (optional) If you lost your key, you can delete the old key on the + same page. You should keep at least one valid public SSH key in your + account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json new file mode 100644 index 00000000000..eb4dd3b3a57 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Applying-for-the-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair" + }, + "previous_title": "account_paragraph_10", + "next_title": "account_paragraph_12", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#adding-multiple-ssh-public-keys-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt new file mode 100644 index 00000000000..2ff8ffc1a08 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt @@ -0,0 +1,14 @@ +How do SSH keys work +- anyone who has the key (and the optional password) can unlock the + door and log in to the account. +- the door to your VSC account is special: it can have multiple + locks (SSH public keys) attached to it, and you only need to open + one lock with the corresponding key (SSH private key) to open + the door (log in to the account). +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). +A typical Windows environment does not come with pre-installed software +to connect and run command-line executables on a HPC. Some tools need to be +installed on your Windows machine first, before we can start the actual +work. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json new file mode 100644 index 00000000000..08573d26bfe --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../../linux-tutorial" + }, + "previous_title": "account_paragraph_3", + "next_title": "account_windows_paragraph_4.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt new file mode 100644 index 00000000000..c89b45d8f2b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt @@ -0,0 +1,13 @@ +Get PuTTY: A free telnet/SSH client +We recommend to use the PuTTY tools package, which is freely available. +You do not need to install PuTTY, you can download the PuTTY and +PuTTYgen executable and run it. This can be useful in situations where +you do not have the required permissions to install software on the +computer you are using. Alternatively, an installation package is also +available. +You can download PuTTY from the official address: +. You +probably want the 64-bits version. If you can install software on your +computer, you can use the "Package files", if not, you can download and +use putty.exe and puttygen.exe in the "Alternative binary files" +section. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json new file mode 100644 index 00000000000..bedb3d33218 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Get-PuTTY-A-free-telnetSSH-client", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_windows_paragraph_4.1", + "next_title": "account_windows_paragraph_4.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt new file mode 100644 index 00000000000..cebd1da3baf --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt @@ -0,0 +1,13 @@ +The PuTTY package consists of several components, but we'll only use +two: +1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) +2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, + see Generate a public/private key pair) +Generating a public/private key pair +Before requesting a VSC account, you need to generate a pair of ssh +keys. You need 2 keys, a public and a private key. You can visualise the +public key as a lock to which only you have the key (your private key). +You can send a copy of your lock to anyone without any problems, because +only you can open it, as long as you keep your private key secure. To +generate a public/private key pair, you can use the PuTTYgen key +generator. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json new file mode 100644 index 00000000000..a8fcacd08a0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "Generating-a-publicprivate-key-pair", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal", + "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + }, + "previous_title": "account_windows_paragraph_4.2", + "next_title": "account_windows_paragraph_4.4", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt new file mode 100644 index 00000000000..b7743b0b9ae --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt @@ -0,0 +1,17 @@ +Start PuTTYgen.exe it and follow these steps: +1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of + bits in the key to 4096. +2. Click on "Generate". To generate the key, you must move the mouse cursor over + the PuTTYgen window (this generates some random data that PuTTYgen + uses to generate the key pair). Once the key pair is generated, your + public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file". +3. Next, it is advised to fill in the "Key comment" field to make it easier + identifiable afterwards. +4. Next, you should specify a passphrase in the "Key passphrase" field and retype it in + the "Confirm passphrase" field. Remember, the passphrase protects the private key against + unauthorised use, so it is best to choose one that is not too easy + to guess but that you can still remember. Using a passphrase is not + required, but we recommend you to use a good passphrase unless you + are certain that your computer's hard disk is encrypted with a + decent password. (If you are not sure your disk is encrypted, it + probably isn't.) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json new file mode 100644 index 00000000000..5fe2e81aa3d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generating-a-publicprivate-key-pair", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_windows_paragraph_4.3", + "next_title": "account_windows_paragraph_4.5", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt new file mode 100644 index 00000000000..2326d87b6d7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt @@ -0,0 +1,7 @@ +5. Save both the public and private keys in a folder on your personal + computer (We recommend to create and put them in the folder + "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the + buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and + "id_rsa.ppk" for the private key. +If you use another program to generate a key pair, please remember that +they need to be in the OpenSSH format to access the HPC clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json new file mode 100644 index 00000000000..79c584a8f41 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generating-a-publicprivate-key-pair", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_windows_paragraph_4.4", + "next_title": "account_paragraph_5", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt new file mode 100644 index 00000000000..b8dba743c0a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt @@ -0,0 +1,13 @@ +Using an SSH agent (optional) +It is possible to setup a SSH agent in Windows. This is an optional +configuration to help you to keep all your SSH keys (if you have +several) stored in the same key ring to avoid to type the SSH key +password each time. The SSH agent is also necessary to enable SSH hops +with key forwarding from Windows. +Pageant is the SSH authentication agent used in windows. This agent should be +available from the PuTTY installation package + or as +stand alone binary package. +After the installation just start the Pageant application in Windows, +this will start the agent in background. The agent icon will be visible +from the Windows panel. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json new file mode 100644 index 00000000000..69771b48c86 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_5", + "next_title": "account_windows_paragraph_6.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt new file mode 100644 index 00000000000..62ac04dd9aa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt @@ -0,0 +1,11 @@ +At this point the agent does not contain any private key. You should +include the private key(s) generated in the previous section Generating a public/private key pair. +1. Click on "Add key" +2. Select the private key file generated in Generating a public/private key pair ("id_rsa.ppk" by default). +3. Enter the same SSH key password used to generate the key. After this + step the new key will be included in Pageant to manage the SSH + connections. +4. You can see the SSH key(s) available in the key ring just clicking + on "View Keys". +5. You can change PuTTY setup to use the SSH agent. Open PuTTY and check + Connection > SSH > Auth > Allow agent forwarding. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json new file mode 100644 index 00000000000..24670708070 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair", + "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + }, + "previous_title": "account_windows_paragraph_6.1", + "next_title": "account_windows_paragraph_6.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt new file mode 100644 index 00000000000..17c94975dec --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt @@ -0,0 +1,5 @@ +Now you can connect to the login nodes as usual. The SSH agent will know +which SSH key should be used and you do not have to type the SSH +passwords each time, this task is done by Pageant agent automatically. +It is also possible to use WinSCP with Pageant, see + for more details. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json new file mode 100644 index 00000000000..d47ad3bd215 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_windows_paragraph_6.2", + "next_title": "account_paragraph_7", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt new file mode 100644 index 00000000000..90c17263cf5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt @@ -0,0 +1,7 @@ +Applying for the account +This file should have been stored in the directory +"C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh" +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json new file mode 100644 index 00000000000..d01ac9c3c16 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Applying-for-the-account", + "title_depth": 2, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_8", + "next_title": "account_paragraph_10", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#applying-for-the-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt new file mode 100644 index 00000000000..aaf5a585ebd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt @@ -0,0 +1,5 @@ +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json new file mode 100644 index 00000000000..45c2bd2d90e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_9", + "next_title": "connecting_paragraph_11", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt new file mode 100644 index 00000000000..b43909c15c4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt @@ -0,0 +1,11 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +WinSCP +To transfer files to and from the cluster, we recommend the use of +WinSCP, a graphical file management tool which can transfer files using +secure protocols such as SFTP and SCP. WinSCP is freely available from +. +To transfer your files using WinSCP, +1. Open the program +2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" + 1. Click "New Site". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json new file mode 100644 index 00000000000..d9fbc64790a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_10", + "next_title": "connecting_windows_paragraph_11.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt new file mode 100644 index 00000000000..642bb4e34b6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt @@ -0,0 +1,19 @@ + 2. Enter "login.hpc.ugent.be" in the "Host name" field. + 3. Enter your "vsc-account" in the "User name" field. + 4. Select "SCP" as the "file" protocol. + 5. Note that the password field remains empty. + 6. Click "Advanced...". + 7. Click "SSH > Authentication". + 8. Select your private key in the field "Private key file". +3. Press the "Save" button, to save the session under "Session > Sites" for future access. +4. Finally, when clicking on "Login", you will be asked for your key passphrase. +The first time you make a connection to the login node, a Security +Alert will appear and you will be asked to verify the authenticity of the +login node. +Make sure the fingerprint in the alert matches one of the following: +- ssh-rsa 2048 10:2f:31:21:04:75:cb:ed:67:e0:d5:0c:a1:5a:f4:78 +- ssh-rsa 2048 SHA256:W8Wz0/FkkCR2ulN7+w8tNI9M0viRgFr2YlHrhKD2Dd0 +- ssh-ed25519 255 19:28:76:94:52:9d:ff:7d:fb:8b:27:b6:d7:69:42:eb +- ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA +- ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f +- ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json new file mode 100644 index 00000000000..65055dc0764 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_windows_paragraph_11.1", + "next_title": "connecting_windows_paragraph_11.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt new file mode 100644 index 00000000000..b52c614f263 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt @@ -0,0 +1,7 @@ +If it does, press Yes, if it doesn't, please contact hpc@ugent.be. +Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255 +rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using. +It is safe to ignore this 255 versus 256 difference, but the part after should be +identical. +Now, try out whether you can transfer an arbitrary file from your local +machine to the HPC and back. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json new file mode 100644 index 00000000000..dd628f8e8cd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_windows_paragraph_11.2", + "next_title": "connecting_paragraph_12", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt new file mode 100644 index 00000000000..14f191fe61a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt @@ -0,0 +1,11 @@ +First Time connection to the HPC infrastructure +ssh_exchange_identification: read: Connection reset by peer +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. +Open a Terminal +You've generated a public/private key pair with PuTTYgen and have an +approved account on the VSC clusters. The next step is to setup the +connection to (one of) the HPC. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json new file mode 100644 index 00000000000..24d4df9e248 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Open-a-Terminal", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Connection-restrictions", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + }, + "previous_title": "connecting_paragraph_3", + "next_title": "connecting_windows_paragraph_4.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt new file mode 100644 index 00000000000..e481b47bc2b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt @@ -0,0 +1,13 @@ +In the screenshots, we show the setup for user +to the HPC cluster via the login node "login.hpc.ugent.be". +1. Start the PuTTY executable putty.exe in your directory + C:\Program Files (x86)\PuTTY and the configuration screen will pop + up. As you will often use the PuTTY tool, we recommend adding a + shortcut on your desktop. +2. Within the category , in the field , enter the name of the + login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to. + +3. In the category "Connection > Data", in the field "Auto-login username", put in , which is your VSC + username that you have received by e-mail after your request was + approved. + diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json new file mode 100644 index 00000000000..a783f797fdb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Open-a-Terminal", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Connection-restrictions", + "previous_title": "connecting_windows_paragraph_4.1", + "next_title": "connecting_windows_paragraph_4.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt new file mode 100644 index 00000000000..fbd5e76f278 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt @@ -0,0 +1,13 @@ +4. In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key + (i.e., "id_rsa.ppk") that you generated and saved above. +5. In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox. +6. Now go back to , and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to + store the session information. + +7. Now pressing "Open", will open a terminal window and asks for you + passphrase. + +8. If this is your first time connecting, you will be asked to verify + the authenticity of the login node. Please see + sectionĀ Warning message when first connecting to new host + on how to do this. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json new file mode 100644 index 00000000000..9da459060af --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Open-a-Terminal", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Connection-restrictions", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + }, + "previous_title": "connecting_windows_paragraph_4.2", + "next_title": "connecting_windows_paragraph_4.4", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt new file mode 100644 index 00000000000..f4a1302750b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt @@ -0,0 +1,11 @@ +9. After entering your correct passphrase, you will be connected to the + login-node of the HPC. +10. To check you can now "Print the Working Directory" (pwd) and check + the name of the computer, where you have logged in (hostname): + $ pwd + /user/home/gent/vsc400/vsc40000 + $ hostname -f + gligar07.gastly.os + +11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") + from the list, "Load" it and press "Open". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json new file mode 100644 index 00000000000..83127a292f8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Open-a-Terminal", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Connection-restrictions", + "previous_title": "connecting_windows_paragraph_4.3", + "next_title": "connecting_paragraph_5", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" +} \ No newline at end of file From 692e77b51b5756859398d992293aa49d6cb4d527 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 15:06:45 +0200 Subject: [PATCH 124/152] fix for issue with html links --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 1530eedf31c..9e5baba82f3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -176,7 +176,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): # add references for every link of format if re.search(r'a href=.*', content): link = content[8:-1] - curr_line = re.sub(f'<{content}>', "[" + str(len(linklist) + 1) + "]", curr_line) + curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist) + 1) + LINK_MARKER, curr_line) linklist.append(link) # drop the syntax words From 7f493a19d3265c4634267075958a7d4caf966e8c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 15:08:23 +0200 Subject: [PATCH 125/152] fix for issue with html links --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 9e5baba82f3..690385a95d4 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -175,8 +175,8 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): # add references for every link of format if re.search(r'a href=.*', content): - link = content[8:-1] - curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist) + 1) + LINK_MARKER, curr_line) + link = content[7:] + curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist)) + LINK_MARKER, curr_line) linklist.append(link) # drop the syntax words From 0e34396f77b0112171d8a30df36cd76c14fbac4d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 15:30:41 +0200 Subject: [PATCH 126/152] fix for issue with relative links to the same document --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 690385a95d4..986ec2ef65b 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -158,7 +158,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): if "#" not in match[1]: linklist.append(match[1]) else: - linklist.append(DOCS_URL + "/" + main_title + "/" + match[1]) + linklist.append(DOCS_URL + "/" + main_title.replace(".md", "") + "/" + match[1]) else: linklist.append(DOCS_URL + "/" + match[1].replace(".md", "/").replace("index", "").rstrip("/")) From fa0004482a3326f8385502b85e5c4ed9b4bf5410 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 15:31:33 +0200 Subject: [PATCH 127/152] added test for replace_markdown_markers --- .../tests/test_replace_markdown_markers.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py new file mode 100644 index 00000000000..f4cee6dd75c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py @@ -0,0 +1,46 @@ +import pytest +from chatbot_parser import replace_markdown_markers + + +@pytest.mark.parametrize("input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist", [ + # baseline test + ("A normal line with nothing special", [], False, "", "A normal line with nothing special", []), + # image 1 + ("![image](a-nice-image.png)", [], False, "", "", []), + # image 2 + ("![](img/Look-at-this-photograph.png)", [], False, "", "", []), + # link 1 (outside docs) + ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "", + "A line with a linkĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "a-nice-link.com"]), + # link 2 (another document within the docs) + ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "", + "A line with a link to the docsĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), + # link 3 (the same document) + ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md", + "A line with a link to the same docĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), + # codeblock + ("```shell", [], True, "", "", []), + # html syntax 1 (normal syntax) + ("A line with something in Bold", [], False, "", "A line with something in Bold", []), + # html syntax 2 (link) + ("A line with another link", ["other-website.com"], False, "", + "A line with another linkĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["other-website.com", "website.com"]), + # html syntax 3 (style) + ("

A line with style

", [], False, "", "A line with style", []), + # Bot comment + ("", [], False, "", "Something about the following table", []), + # non-Bot comment + ("", [], False, "", "", []), + # something else with <> + ("A line with an example where you should put ", [], False, "", "A line with an example where you should put ", []), + # info/tips/warnings + ("!!! warning", [], False, "", " warning", []), + # collapsable admonitions + ("??? note", [], False, "", " note", []), + # Markdown syntax 1 (not in code block) + ("`Line` **with** ++a++ _lot_ *of* _++markdown++_ `syntax`", [], False, "", "Line with a lot of markdown syntax", []), + # Markdown syntax 2 (in code block) + ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", []) +]) +def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist): + assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist) From b3952b2e769483bc1a6dc7c146b847f7519843a3 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 16:22:41 +0200 Subject: [PATCH 128/152] fix to small inconsistency in metadata --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 986ec2ef65b..371ee52e6cd 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -805,7 +805,7 @@ def insert_links(text, links, options): for link_number in links.keys(): if link_number in present_links: - new_links[len(new_links.keys())] = links[link_number] + new_links[str(len(new_links.keys()))] = links[link_number] return text, new_links From 73072bf2cd57e28b8dafbb2e88ef30f52c95958e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 16:26:41 +0200 Subject: [PATCH 129/152] added test for insert_links --- .../tests/test_insert_links.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py new file mode 100644 index 00000000000..9109f2518ad --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py @@ -0,0 +1,31 @@ +import pytest +from chatbot_parser import insert_links + +options_include = {"INCLUDE_LINKS_IN_PLAINTEXT": True} +options_leave_out = {"INCLUDE_LINKS_IN_PLAINTEXT": False} +links_input = {"0": "https://first_link.com", "1": "https://second_link.be", "2": "https://docs.hpc.ugent.be/account#welcome-e-mail", "3": "https://final-link.org"} + + +@pytest.mark.parametrize("text_input, options_input, text_output, new_links", [ + # Text without links + # don't include links + ("Text without links\nand with two lines.", options_leave_out, "Text without links\nand with two lines.", {}), + # include links + ("Text without links\nand with two lines.", options_include, "Text without links\nand with two lines.", {}), + # Text with all links + # don't include links + ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§0Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§2Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§3Ā§linkĀ§linkĀ§", options_leave_out, + "Text with all the links\nand with multiple lines.\n\n\n\n", links_input), + # include links + ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§0Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§2Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§3Ā§linkĀ§linkĀ§", options_include, + "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://second_link.be \n https://docs.hpc.ugent.be/account#welcome-e-mail \n https://final-link.org ", links_input), + # Text with some links + # don't include links + ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§3Ā§linkĀ§linkĀ§", options_leave_out, + "Text with all the links\nand with multiple lines.\n\n", {"0": "https://second_link.be", "1": "https://final-link.org"}), + # include links + ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§0Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§2Ā§linkĀ§linkĀ§", options_include, + "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://docs.hpc.ugent.be/account#welcome-e-mail ", {"0": "https://first_link.com", "1": "https://docs.hpc.ugent.be/account#welcome-e-mail"}) +]) +def test_insert_links(text_input, options_input, text_output, new_links): + assert insert_links(text_input, links_input, options_input) == (text_output, new_links) From 31613094e7e8fd60e74a0d639fcb28b08d262e65 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 28 Aug 2024 13:16:04 +0200 Subject: [PATCH 130/152] make sure paragraphs only include full lists --- .../chatbot_parser.py | 31 +++++++++++-- .../generic/account/account_paragraph_12.txt | 3 -- .../generic/account/account_paragraph_3.txt | 6 +++ .../connecting/connecting_paragraph_1.txt | 5 +++ .../connecting/connecting_paragraph_2.txt | 14 +++--- .../connecting/connecting_paragraph_3.txt | 16 +++---- .../connecting_paragraph_3_metadata.json | 8 +++- .../account/account_linux_paragraph_11.1.txt | 5 ++- .../account/account_linux_paragraph_4.1.txt | 6 --- .../account/account_linux_paragraph_5.2.txt | 1 + .../account/account_linux_paragraph_5.3.txt | 1 - .../connecting_linux_paragraph_4.1.txt | 7 --- ...nnecting_linux_paragraph_4.1_metadata.json | 15 ------- .../account/account_macos_paragraph_11.1.txt | 5 ++- .../account/account_macos_paragraph_4.1.txt | 6 --- .../connecting_macos_paragraph_13.1.txt | 3 ++ .../connecting_macos_paragraph_13.2.txt | 3 -- .../connecting_macos_paragraph_4.1.txt | 7 --- ...nnecting_macos_paragraph_4.1_metadata.json | 15 ------- .../account_windows_paragraph_11.1.txt | 5 ++- .../account/account_windows_paragraph_4.1.txt | 13 +++--- ...ccount_windows_paragraph_4.1_metadata.json | 4 +- .../account/account_windows_paragraph_4.2.txt | 13 +++--- ...ccount_windows_paragraph_4.2_metadata.json | 8 +++- .../account/account_windows_paragraph_4.3.txt | 28 +++++++++--- ...ccount_windows_paragraph_4.3_metadata.json | 4 -- .../account/account_windows_paragraph_4.4.txt | 19 +------- ...ccount_windows_paragraph_4.4_metadata.json | 2 +- .../account/account_windows_paragraph_4.5.txt | 7 --- ...ccount_windows_paragraph_4.5_metadata.json | 11 ----- .../connecting_windows_paragraph_11.1.txt | 9 ++++ .../connecting_windows_paragraph_11.2.txt | 10 +---- .../connecting_windows_paragraph_11.3.txt | 1 - .../connecting_windows_paragraph_4.1.txt | 43 ++++++++++++++++--- ...ecting_windows_paragraph_4.1_metadata.json | 7 ++- .../connecting_windows_paragraph_4.2.txt | 13 ------ ...ecting_windows_paragraph_4.2_metadata.json | 11 ----- .../connecting_windows_paragraph_4.3.txt | 13 ------ ...ecting_windows_paragraph_4.3_metadata.json | 14 ------ .../connecting_windows_paragraph_4.4.txt | 11 ----- ...ecting_windows_paragraph_4.4_metadata.json | 11 ----- 41 files changed, 172 insertions(+), 242 deletions(-) delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 371ee52e6cd..6ec1aa0e9b2 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -7,7 +7,7 @@ import re import shutil import yaml -from itertools import chain +from itertools import chain, tee, zip_longest from pathlib import Path from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template @@ -402,6 +402,9 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # variable to indicate that previous section was one with if-statements previous_contained_if = False + # variable to indicate that the previous line was part of a list + in_list = False + # paragraph number to add to title paragraph_number = 1 @@ -410,7 +413,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # TODO: define metadata data if split occurs on paragraphs and last_title and title_level are known (placeholder in place right now) if current_paragraph_number != -1: - last_title_level = 5 + last_title_level = 4 last_dir = "PLACEHOLDER" # list to keep track of most recent directories on each title level @@ -418,12 +421,32 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, with open(file, 'r') as readfile: - for line in readfile: + # Create two independent iterators from the original file iterator (needed to check for lists) + current_line, next_line = tee(readfile) + + # Advance the next_line iterator by one step, so it is always one step ahead + next(next_line, None) + + # Process the lines + for line, nxt in zip_longest(current_line, next_line, fillvalue=""): # detect if-statements starting or ending on the current line in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len( re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) + # detect whether the current line is in a list + if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line): # beginning of a list entry + in_list = True + elif re.search(r'^\s{2,}.+$', line) and in_list: # middle of a list entry + pass + elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries + pass + else: + in_list = False + + if in_list: + print(line[:-1]) + # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: @@ -434,7 +457,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, in_code_block = not in_code_block # check whether a new paragraph should be started - if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block: + if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block and not in_list: # create a title for the previous paragraph if current_paragraph_number == -1: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt index 6ee6880838e..7ecd78e5c9f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt @@ -1,6 +1,3 @@ -5. Take into account that it will take some time before the new SSH - public key is active in your account on the system; waiting for - 15-30 minutes should be sufficient. Computation Workflow on the HPC A typical Computation workflow will be: 1. Connect to the HPC diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt index 9632ef1f5af..e4946869273 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt @@ -9,3 +9,9 @@ How do SSH keys work? VSC and they put it on the door that gives access to your account. - the SSH private key is like a physical key: you don't hand it out to other people. +- anyone who has the key (and the optional password) can unlock the + door and log in to the account. +- the door to your VSC account is special: it can have multiple + locks (SSH public keys) attached to it, and you only need to open + one lock with the corresponding key (SSH private key) to open + the door (log in to the account). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt index b144712c9df..bc5a1f80140 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt @@ -11,3 +11,8 @@ you need to do or know: 3. Optionally, if you wish to use programs with a **graphical user interface**, you will need an X-server on your client system and log in to the login nodes with X-forwarding enabled. +4. Often several versions of software packages and libraries are + installed, so you need to select the ones you need. To manage + different versions efficiently, the VSC clusters use so-called + modules, so you will need to select and load the modules that + you need. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt index 4c1d879b954..b150c8fbb28 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt @@ -1,8 +1,3 @@ -4. Often several versions of software packages and libraries are - installed, so you need to select the ones you need. To manage - different versions efficiently, the VSC clusters use so-called - modules, so you will need to select and load the modules that - you need. Connection restrictions Since March 20th 2020, restrictions are in place that limit from where you can connect to the VSC HPC infrastructure, in response to security @@ -12,3 +7,12 @@ networks, and from (most) Belgian commercial internet providers. All other IP domains are blocked by default. If you are connecting from an IP address that is not allowed direct access, you have the following options to get access to VSC login nodes: +- Use an VPN connection to connect to UGent the network (recommended). +- Whitelist your IP address automatically by accessing + and log in with your UGent account. + - While this web connection is active new SSH sessions can be + started. + - Active SSH sessions will remain active even when this web page + is closed. +- Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your + IP range (e.g., for industry access, automated processes). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt index 668a1e6df57..31dd6463266 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt @@ -1,12 +1,10 @@ -- Use an VPN connection to connect to UGent the network (recommended). -- Whitelist your IP address automatically by accessing - and log in with your UGent account. - - While this web connection is active new SSH sessions can be - started. - - Active SSH sessions will remain active even when this web page - is closed. -- Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your - IP range (e.g., for industry access, automated processes). Trying to establish an SSH connection from an IP address that does not adhere to these restrictions will result in an immediate failure to connect, with an error message like: +ssh_exchange_identification: read: Connection reset by peer + +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json index 4dc75d7dcf3..471e6bfcbf2 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json @@ -1,11 +1,15 @@ { "main_title": "connecting", - "subtitle": "Connection-restrictions", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", "title_depth": 2, "directory": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + }, "parent_title": "", "previous_title": "connecting_paragraph_2", "next_title": "connecting_paragraph_4", "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions" + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt index 3a46897bdee..b2734cc9f89 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt @@ -8,4 +8,7 @@ Adding multiple SSH public keys (optional) if it is too short, wrong type, or in a wrong format. 4. (optional) If you lost your key, you can delete the old key on the same page. You should keep at least one valid public SSH key in your - account. \ No newline at end of file + account. +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt index 1395e2ee7bd..3a282a73a15 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt @@ -1,10 +1,4 @@ How do SSH keys work -- anyone who has the key (and the optional password) can unlock the - door and log in to the account. -- the door to your VSC account is special: it can have multiple - locks (SSH public keys) attached to it, and you only need to open - one lock with the corresponding key (SSH private key) to open - the door (log in to the account). Since all VSC clusters use Linux as their main operating system, you will need to get acquainted with using the command-line interface and using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt index a166dd14503..318f913fba3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt @@ -11,3 +11,4 @@ following commands: 1. ssh-keygen: to generate the SSH key pair (public + private key); 2. ssh: to open a shell on a remote machine; 3. sftp: a secure equivalent of ftp; +4. scp: a secure equivalent of the remote copy command rcp. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt index 2e8fe9e3a24..9d84f459724 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt @@ -1,4 +1,3 @@ -4. scp: a secure equivalent of the remote copy command rcp. Generate a public/private key pair with OpenSSH A key pair might already be present in the default location inside your home directory. Therefore, we first check if a key is available with the diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt deleted file mode 100644 index 773d03f0689..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt +++ /dev/null @@ -1,7 +0,0 @@ -First Time connection to the HPC infrastructure -ssh_exchange_identification: read: Connection reset by peer -First Time connection to the HPC infrastructure -The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. -If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. -If you have any issues connecting to the HPC after you've followed these -steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json deleted file mode 100644 index f6745fc31dc..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" - }, - "previous_title": "connecting_paragraph_3", - "next_title": "connecting_paragraph_5", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt index 3a46897bdee..b2734cc9f89 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt @@ -8,4 +8,7 @@ Adding multiple SSH public keys (optional) if it is too short, wrong type, or in a wrong format. 4. (optional) If you lost your key, you can delete the old key on the same page. You should keep at least one valid public SSH key in your - account. \ No newline at end of file + account. +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt index 1395e2ee7bd..3a282a73a15 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt @@ -1,10 +1,4 @@ How do SSH keys work -- anyone who has the key (and the optional password) can unlock the - door and log in to the account. -- the door to your VSC account is special: it can have multiple - locks (SSH public keys) attached to it, and you only need to open - one lock with the corresponding key (SSH private key) to open - the door (log in to the account). Since all VSC clusters use Linux as their main operating system, you will need to get acquainted with using the command-line interface and using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt index c5ed84e9ea0..20a4acb40a8 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt @@ -10,3 +10,6 @@ This is the one-time setup you will need to do before connecting: 3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in your VSC account id (this looks like vsc40000). 4. Select the location of your SSH private key in the "SSH Private Key" field. +5. Finally, type in a name for the bookmark in the "Nickname" field and + close the window by pressing on the red circle in the top left + corner of the window. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt index d48d0ce00a3..1d20edf411f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt @@ -1,6 +1,3 @@ -5. Finally, type in a name for the bookmark in the "Nickname" field and - close the window by pressing on the red circle in the top left - corner of the window. To open the connection, click on the "Bookmarks" icon (which resembles an open book) and double-click on the bookmark you just created. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt deleted file mode 100644 index 773d03f0689..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt +++ /dev/null @@ -1,7 +0,0 @@ -First Time connection to the HPC infrastructure -ssh_exchange_identification: read: Connection reset by peer -First Time connection to the HPC infrastructure -The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. -If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. -If you have any issues connecting to the HPC after you've followed these -steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json deleted file mode 100644 index 89431f52435..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" - }, - "previous_title": "connecting_paragraph_3", - "next_title": "connecting_paragraph_5", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt index 062ea570d96..0863009f290 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt @@ -8,4 +8,7 @@ Adding multiple SSH public keys (optional) if it is too short, wrong type, or in a wrong format. 4. (optional) If you lost your key, you can delete the old key on the same page. You should keep at least one valid public SSH key in your - account. \ No newline at end of file + account. +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt index 2ff8ffc1a08..1e70493305f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt @@ -1,10 +1,4 @@ How do SSH keys work -- anyone who has the key (and the optional password) can unlock the - door and log in to the account. -- the door to your VSC account is special: it can have multiple - locks (SSH public keys) attached to it, and you only need to open - one lock with the corresponding key (SSH private key) to open - the door (log in to the account). Since all VSC clusters use Linux as their main operating system, you will need to get acquainted with using the command-line interface and using the terminal (see tutorial). @@ -12,3 +6,10 @@ A typical Windows environment does not come with pre-installed software to connect and run command-line executables on a HPC. Some tools need to be installed on your Windows machine first, before we can start the actual work. +Get PuTTY: A free telnet/SSH client +We recommend to use the PuTTY tools package, which is freely available. +You do not need to install PuTTY, you can download the PuTTY and +PuTTYgen executable and run it. This can be useful in situations where +you do not have the required permissions to install software on the +computer you are using. Alternatively, an installation package is also +available. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json index 08573d26bfe..ce74735c538 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -1,6 +1,6 @@ { "main_title": "account", - "subtitle": "How-do-SSH-keys-work", + "subtitle": "Get-PuTTY-A-free-telnetSSH-client", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", @@ -10,5 +10,5 @@ "previous_title": "account_paragraph_3", "next_title": "account_windows_paragraph_4.2", "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#how-do-ssh-keys-work" + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt index c89b45d8f2b..1a30a219fec 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt @@ -1,13 +1,12 @@ -Get PuTTY: A free telnet/SSH client -We recommend to use the PuTTY tools package, which is freely available. -You do not need to install PuTTY, you can download the PuTTY and -PuTTYgen executable and run it. This can be useful in situations where -you do not have the required permissions to install software on the -computer you are using. Alternatively, an installation package is also -available. You can download PuTTY from the official address: . You probably want the 64-bits version. If you can install software on your computer, you can use the "Package files", if not, you can download and use putty.exe and puttygen.exe in the "Alternative binary files" section. +The PuTTY package consists of several components, but we'll only use +two: +1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) +2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, + see Generate a public/private key pair) +Generating a public/private key pair diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json index bedb3d33218..9616b41452a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json @@ -1,11 +1,15 @@ { "main_title": "account", - "subtitle": "Get-PuTTY-A-free-telnetSSH-client", + "subtitle": "Generating-a-publicprivate-key-pair", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal", + "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + }, "previous_title": "account_windows_paragraph_4.1", "next_title": "account_windows_paragraph_4.3", "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client" + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt index cebd1da3baf..de5d164bb7a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt @@ -1,9 +1,3 @@ -The PuTTY package consists of several components, but we'll only use -two: -1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) -2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, - see Generate a public/private key pair) -Generating a public/private key pair Before requesting a VSC account, you need to generate a pair of ssh keys. You need 2 keys, a public and a private key. You can visualise the public key as a lock to which only you have the key (your private key). @@ -11,3 +5,25 @@ You can send a copy of your lock to anyone without any problems, because only you can open it, as long as you keep your private key secure. To generate a public/private key pair, you can use the PuTTYgen key generator. +Start PuTTYgen.exe it and follow these steps: +1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of + bits in the key to 4096. +2. Click on "Generate". To generate the key, you must move the mouse cursor over + the PuTTYgen window (this generates some random data that PuTTYgen + uses to generate the key pair). Once the key pair is generated, your + public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file". +3. Next, it is advised to fill in the "Key comment" field to make it easier + identifiable afterwards. +4. Next, you should specify a passphrase in the "Key passphrase" field and retype it in + the "Confirm passphrase" field. Remember, the passphrase protects the private key against + unauthorised use, so it is best to choose one that is not too easy + to guess but that you can still remember. Using a passphrase is not + required, but we recommend you to use a good passphrase unless you + are certain that your computer's hard disk is encrypted with a + decent password. (If you are not sure your disk is encrypted, it + probably isn't.) +5. Save both the public and private keys in a folder on your personal + computer (We recommend to create and put them in the folder + "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the + buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and + "id_rsa.ppk" for the private key. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json index a8fcacd08a0..06b6e998c08 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json @@ -4,10 +4,6 @@ "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal", - "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" - }, "previous_title": "account_windows_paragraph_4.2", "next_title": "account_windows_paragraph_4.4", "OS": "windows", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt index b7743b0b9ae..d0425d6738f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt @@ -1,17 +1,2 @@ -Start PuTTYgen.exe it and follow these steps: -1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of - bits in the key to 4096. -2. Click on "Generate". To generate the key, you must move the mouse cursor over - the PuTTYgen window (this generates some random data that PuTTYgen - uses to generate the key pair). Once the key pair is generated, your - public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file". -3. Next, it is advised to fill in the "Key comment" field to make it easier - identifiable afterwards. -4. Next, you should specify a passphrase in the "Key passphrase" field and retype it in - the "Confirm passphrase" field. Remember, the passphrase protects the private key against - unauthorised use, so it is best to choose one that is not too easy - to guess but that you can still remember. Using a passphrase is not - required, but we recommend you to use a good passphrase unless you - are certain that your computer's hard disk is encrypted with a - decent password. (If you are not sure your disk is encrypted, it - probably isn't.) +If you use another program to generate a key pair, please remember that +they need to be in the OpenSSH format to access the HPC clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json index 5fe2e81aa3d..fba810e7299 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json @@ -5,7 +5,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "previous_title": "account_windows_paragraph_4.3", - "next_title": "account_windows_paragraph_4.5", + "next_title": "account_paragraph_5", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt deleted file mode 100644 index 2326d87b6d7..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt +++ /dev/null @@ -1,7 +0,0 @@ -5. Save both the public and private keys in a folder on your personal - computer (We recommend to create and put them in the folder - "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the - buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and - "id_rsa.ppk" for the private key. -If you use another program to generate a key pair, please remember that -they need to be in the OpenSSH format to access the HPC clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json deleted file mode 100644 index 79c584a8f41..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generating-a-publicprivate-key-pair", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_windows_paragraph_4.4", - "next_title": "account_paragraph_5", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt index b43909c15c4..a4f00ba7a5f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt @@ -9,3 +9,12 @@ To transfer your files using WinSCP, 1. Open the program 2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" 1. Click "New Site". + 2. Enter "login.hpc.ugent.be" in the "Host name" field. + 3. Enter your "vsc-account" in the "User name" field. + 4. Select "SCP" as the "file" protocol. + 5. Note that the password field remains empty. + 6. Click "Advanced...". + 7. Click "SSH > Authentication". + 8. Select your private key in the field "Private key file". +3. Press the "Save" button, to save the session under "Session > Sites" for future access. +4. Finally, when clicking on "Login", you will be asked for your key passphrase. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt index 642bb4e34b6..82c71ac4129 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt @@ -1,12 +1,3 @@ - 2. Enter "login.hpc.ugent.be" in the "Host name" field. - 3. Enter your "vsc-account" in the "User name" field. - 4. Select "SCP" as the "file" protocol. - 5. Note that the password field remains empty. - 6. Click "Advanced...". - 7. Click "SSH > Authentication". - 8. Select your private key in the field "Private key file". -3. Press the "Save" button, to save the session under "Session > Sites" for future access. -4. Finally, when clicking on "Login", you will be asked for your key passphrase. The first time you make a connection to the login node, a Security Alert will appear and you will be asked to verify the authenticity of the login node. @@ -17,3 +8,4 @@ Make sure the fingerprint in the alert matches one of the following: - ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA - ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f - ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ +If it does, press Yes, if it doesn't, please contact hpc@ugent.be. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt index b52c614f263..c0ffe6b4602 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt @@ -1,4 +1,3 @@ -If it does, press Yes, if it doesn't, please contact hpc@ugent.be. Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255 rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using. It is safe to ignore this 255 versus 256 difference, but the part after should be diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt index 14f191fe61a..b5ecfb93e88 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt @@ -1,11 +1,42 @@ First Time connection to the HPC infrastructure -ssh_exchange_identification: read: Connection reset by peer -First Time connection to the HPC infrastructure -The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. -If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. -If you have any issues connecting to the HPC after you've followed these -steps, see Issues connecting to login node to troubleshoot. Open a Terminal You've generated a public/private key pair with PuTTYgen and have an approved account on the VSC clusters. The next step is to setup the connection to (one of) the HPC. +In the screenshots, we show the setup for user +to the HPC cluster via the login node "login.hpc.ugent.be". +1. Start the PuTTY executable putty.exe in your directory + C:\Program Files (x86)\PuTTY and the configuration screen will pop + up. As you will often use the PuTTY tool, we recommend adding a + shortcut on your desktop. +2. Within the category , in the field , enter the name of the + login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to. + +3. In the category "Connection > Data", in the field "Auto-login username", put in , which is your VSC + username that you have received by e-mail after your request was + approved. + +4. In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key + (i.e., "id_rsa.ppk") that you generated and saved above. +5. In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox. +6. Now go back to , and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to + store the session information. + +7. Now pressing "Open", will open a terminal window and asks for you + passphrase. + +8. If this is your first time connecting, you will be asked to verify + the authenticity of the login node. Please see + sectionĀ Warning message when first connecting to new host + on how to do this. +9. After entering your correct passphrase, you will be connected to the + login-node of the HPC. +10. To check you can now "Print the Working Directory" (pwd) and check + the name of the computer, where you have logged in (hostname): + $ pwd + /user/home/gent/vsc400/vsc40000 + $ hostname -f + gligar07.gastly.os + +11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") + from the list, "Load" it and press "Open". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json index 24d4df9e248..ef4de8bd8e4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json @@ -3,13 +3,12 @@ "subtitle": "Open-a-Terminal", "title_depth": 3, "directory": "connecting", - "parent_title": "Connection-restrictions", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" }, "previous_title": "connecting_paragraph_3", - "next_title": "connecting_windows_paragraph_4.2", + "next_title": "connecting_paragraph_5", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt deleted file mode 100644 index e481b47bc2b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt +++ /dev/null @@ -1,13 +0,0 @@ -In the screenshots, we show the setup for user -to the HPC cluster via the login node "login.hpc.ugent.be". -1. Start the PuTTY executable putty.exe in your directory - C:\Program Files (x86)\PuTTY and the configuration screen will pop - up. As you will often use the PuTTY tool, we recommend adding a - shortcut on your desktop. -2. Within the category , in the field , enter the name of the - login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to. - -3. In the category "Connection > Data", in the field "Auto-login username", put in , which is your VSC - username that you have received by e-mail after your request was - approved. - diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json deleted file mode 100644 index a783f797fdb..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Open-a-Terminal", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Connection-restrictions", - "previous_title": "connecting_windows_paragraph_4.1", - "next_title": "connecting_windows_paragraph_4.3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt deleted file mode 100644 index fbd5e76f278..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt +++ /dev/null @@ -1,13 +0,0 @@ -4. In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key - (i.e., "id_rsa.ppk") that you generated and saved above. -5. In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox. -6. Now go back to , and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to - store the session information. - -7. Now pressing "Open", will open a terminal window and asks for you - passphrase. - -8. If this is your first time connecting, you will be asked to verify - the authenticity of the login node. Please see - sectionĀ Warning message when first connecting to new host - on how to do this. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json deleted file mode 100644 index 9da459060af..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Open-a-Terminal", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Connection-restrictions", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" - }, - "previous_title": "connecting_windows_paragraph_4.2", - "next_title": "connecting_windows_paragraph_4.4", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt deleted file mode 100644 index f4a1302750b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt +++ /dev/null @@ -1,11 +0,0 @@ -9. After entering your correct passphrase, you will be connected to the - login-node of the HPC. -10. To check you can now "Print the Working Directory" (pwd) and check - the name of the computer, where you have logged in (hostname): - $ pwd - /user/home/gent/vsc400/vsc40000 - $ hostname -f - gligar07.gastly.os - -11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") - from the list, "Load" it and press "Open". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json deleted file mode 100644 index 83127a292f8..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Open-a-Terminal", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Connection-restrictions", - "previous_title": "connecting_windows_paragraph_4.3", - "next_title": "connecting_paragraph_5", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" -} \ No newline at end of file From 3407be3ea8b45de9d43e91fda8c4730ab0ae34e2 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 28 Aug 2024 13:43:49 +0200 Subject: [PATCH 131/152] adapted to the new source files --- .../chatbot_parser.py | 11 ++----- .../generic/account/account_paragraph_10.txt | 1 - .../compiling_your_software_paragraph_1.txt | 10 ++++++ .../compiling_your_software_paragraph_10.txt | 19 +++++++++++ ...g_your_software_paragraph_10_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_11.txt | 20 ++++++++++++ ...g_your_software_paragraph_11_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_12.txt | 9 ++++++ ...g_your_software_paragraph_12_metadata.json | 11 +++++++ ...ng_your_software_paragraph_1_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_2.txt | 13 ++++++++ ...ng_your_software_paragraph_2_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_3.txt | 13 ++++++++ ...ng_your_software_paragraph_3_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_4.txt | 15 +++++++++ ...ng_your_software_paragraph_4_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_5.txt | 16 ++++++++++ ...ng_your_software_paragraph_5_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_6.txt | 30 +++++++++++++++++ ...ng_your_software_paragraph_6_metadata.json | 14 ++++++++ .../compiling_your_software_paragraph_7.txt | 15 +++++++++ ...ng_your_software_paragraph_7_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_8.txt | 19 +++++++++++ ...ng_your_software_paragraph_8_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_9.txt | 32 +++++++++++++++++++ ...ng_your_software_paragraph_9_metadata.json | 11 +++++++ .../account/account_linux_paragraph_5.3.txt | 2 +- .../account/account_linux_paragraph_5.4.txt | 7 +--- .../account/account_linux_paragraph_7.1.txt | 2 +- .../account/account_linux_paragraph_7.2.txt | 2 +- .../connecting_linux_paragraph_10.1.txt | 10 +++--- .../connecting_linux_paragraph_12.4.txt | 2 +- .../connecting_linux_paragraph_12.5.txt | 2 +- .../connecting_linux_paragraph_12.6.txt | 2 +- .../connecting_linux_paragraph_5.1.txt | 2 +- .../connecting_linux_paragraph_5.2.txt | 2 +- .../account/account_macos_paragraph_5.3.txt | 2 +- .../account/account_macos_paragraph_5.4.txt | 7 +--- .../account/account_macos_paragraph_7.1.txt | 2 +- .../account/account_macos_paragraph_7.2.txt | 2 +- .../connecting_macos_paragraph_10.1.txt | 10 +++--- .../connecting_macos_paragraph_12.4.txt | 2 +- .../connecting_macos_paragraph_12.5.txt | 2 +- .../connecting_macos_paragraph_12.6.txt | 2 +- .../connecting_macos_paragraph_5.1.txt | 2 +- .../connecting_macos_paragraph_5.2.txt | 2 +- .../connecting_windows_paragraph_4.1.txt | 3 +- 47 files changed, 376 insertions(+), 51 deletions(-) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 6ec1aa0e9b2..2b23fb4e962 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -195,10 +195,6 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): elif re.fullmatch(r'!--.*?--', content): curr_line = re.sub(r'<.*?>', "", curr_line) - # special case (ugly fix) - elif ' files', "", curr_line) - # keep the rest else: pass @@ -224,7 +220,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): for i, content in enumerate(asterisks): curr_line = re.sub(r"(\*+)" + content[1] + r"\1", content[1], curr_line) - pluses = re.findall(r'\+\+(.+?)\+\+', curr_line) + pluses = list(set(re.findall(r'\+\+([^ ]+?)\+\+', curr_line) + re.findall(r'\+\+(".+?")\+\+', curr_line))) if pluses: for i, content in enumerate(pluses): curr_line = re.sub(r"\+\+" + content + r"\+\+", content, curr_line) @@ -437,6 +433,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # detect whether the current line is in a list if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line): # beginning of a list entry in_list = True + # print("List entry found") elif re.search(r'^\s{2,}.+$', line) and in_list: # middle of a list entry pass elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries @@ -444,9 +441,6 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, else: in_list = False - if in_list: - print(line[:-1]) - # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: @@ -969,6 +963,7 @@ def main(options, verbose=True): # for loops over all files for filename in filenames.keys(): + print("Processing " + filename) ################### define/reset loop specific variables ################### # boolean indicating whether the current file is part of the linux tutorial diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt index 371dd9db52b..f486b9b1348 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt @@ -10,7 +10,6 @@ https://account.vscentrum.be/ For further info please visit https://www.vscentrum.be/user-portal Kind regards, -- The VSC administrators - Now, you can start using the HPC. You can always look up your VSC id later by visiting . Adding multiple SSH public keys (optional) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt new file mode 100644 index 00000000000..db1afd43e68 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt @@ -0,0 +1,10 @@ +Compiling and testing your software on the HPC +All nodes in the HPC cluster are running the "RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty)" +Operating system, which is a specific version of Red Hat Enterprise Linux. This means that all the +software programs +(executable) that the end-user wants to run on the HPC first must be +compiled for RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). It also means that you first have to install all the +required external software packages on the HPC. +Most commonly used compilers are already pre-installed on the HPC and can be +used straight away. Also, many popular external software packages, which +are regularly used in the scientific community, are also pre-installed. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt new file mode 100644 index 00000000000..d49ba76b01a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt @@ -0,0 +1,19 @@ +The "mpi_hello.c" program is a simple source file, written in C with MPI +library calls. +Then, check the command line options for *"mpicc" (GNU C-Compiler with +MPI extensions)*, then we compile and list the contents of the directory +again: +mpicc --help +mpicc -o mpihello mpihello.c +ls -l +A new file "hello" has been created. Note that this program has +"execute" rights. +Let's test this program on the "login" node first: +$ ./mpihello +Hello World from Node 0. +It seems to work, now run it on the HPC. +qsub mpihello.pbs +Compiling a parallel program in Intel Parallel Studio Cluster Edition +We will now compile the same program, but using the Intel Parallel +Studio Cluster Edition compilers. We stay in the examples directory for +this chapter: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json new file mode 100644 index 00000000000..ca0d7d80669 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_9", + "next_title": "compiling_your_software_paragraph_11", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt new file mode 100644 index 00000000000..be02d069ac7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt @@ -0,0 +1,20 @@ +cd ~/examples/Compiling-and-testing-your-software-on-the-HPC +We will compile this C/MPI -file into an executable with the Intel +Parallel Studio Cluster Edition. First, clear the modules (purge) and +then load the latest "intel" module: +module purge +module load intel +Then, compile and list the contents of the directory again. The Intel +equivalent of mpicc is mpiicc. +mpiicc -o mpihello mpihello.c +ls -l +Note that the old "mpihello" file has been overwritten. Let's test this +program on the "login" node first: +$ ./mpihello +Hello World from Node 0. +It seems to work, now run it on the HPC. +qsub mpihello.pbs +Note: The AUGent only has a license for the Intel Parallel Studio Cluster +Edition for a fixed number of users. As such, it might happen that you +have to wait a few minutes before a floating license becomes available +for your use. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json new file mode 100644 index 00000000000..808331a3f9d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_10", + "next_title": "compiling_your_software_paragraph_12", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt new file mode 100644 index 00000000000..1d37014a426 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt @@ -0,0 +1,9 @@ +Note: The Intel Parallel Studio Cluster Edition contains equivalent +compilers for all GNU compilers. Hereafter the overview for C, C++ and +Fortran compilers. +| | Sequential Program | | **Parallel Program (with MPI)** | | +|-------------|------------------------|-----------|---------------------------------|-----------| +| | GNU | Intel | GNU | Intel | +| C | gcc | icc | mpicc | mpiicc | +| **C++** | g++ | icpc | mpicxx | mpiicpc | +| Fortran | gfortran | ifort | mpif90 | mpiifort | \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json new file mode 100644 index 00000000000..d032428daf1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_11", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json new file mode 100644 index 00000000000..ec4b55c9a4d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-and-testing-your-software-on-the-HPC", + "title_depth": 1, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": null, + "next_title": "compiling_your_software_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-testing-your-software-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt new file mode 100644 index 00000000000..b52639b649d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt @@ -0,0 +1,13 @@ +Check the pre-installed software on the HPC +In order to check all the available modules and their version numbers, +which are pre-installed on the HPC enter: +When your required application is not available on the HPC please contact +any HPC member. Be aware of potential "License Costs". "Open Source" +software is often preferred. +Porting your code +To port a software-program is to translate it from the operating system in +which it was developed (e.g., Windows 7) to another operating system +(e.g., Red Hat Enterprise Linux on our HPC) so that it can be used there. Porting implies some +degree of effort, but not nearly as much as redeveloping the program in +the new environment. It all depends on how "portable" you wrote your +code. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json new file mode 100644 index 00000000000..00750c81d97 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Porting-your-code", + "title_depth": 2, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_1", + "next_title": "compiling_your_software_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt new file mode 100644 index 00000000000..f994f0bc148 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt @@ -0,0 +1,13 @@ +In the simplest case the file or files may simply be copied from one +machine to the other. However, in many cases the software is installed +on a computer in a way, which depends upon its detailed hardware, +software, and setup, with device drivers for particular devices, using +installed operating system and supporting software components, and using +different directories. +In some cases software, usually described as "portable software" is +specifically designed to run on different computers with compatible +operating systems and processors without any machine-dependent +installation; it is sufficient to transfer specified directories and +their contents. Hardware- and software-specific information is often +stored in configuration files in specified locations (e.g., the registry +on machines running MS Windows). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json new file mode 100644 index 00000000000..90e7d236beb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Porting-your-code", + "title_depth": 2, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_2", + "next_title": "compiling_your_software_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt new file mode 100644 index 00000000000..f7bf4172b71 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt @@ -0,0 +1,15 @@ +Software, which is not portable in this sense, will have to be +transferred with modifications to support the environment on the +destination machine. +Whilst programming, it would be wise to stick to certain standards +(e.g., ISO/ANSI/POSIX). This will ease the porting of your code to other +platforms. +Porting your code to the RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty) platform is the responsibility of the end-user. +Compiling and building on the HPC +Compiling refers to the process of translating code written in some +programming language, e.g., Fortran, C, or C++, to machine code. +Building is similar, but includes gluing together the machine code +resulting from different source files into an executable (or library). +The text below guides you through some basic problems typical for small +software projects. For larger projects it is more appropriate to use +makefiles or even an advanced build system like CMake. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json new file mode 100644 index 00000000000..b7c9ef0f71b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-and-building-on-the-HPC", + "title_depth": 2, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_3", + "next_title": "compiling_your_software_paragraph_5", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt new file mode 100644 index 00000000000..342262b9264 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt @@ -0,0 +1,16 @@ +All the HPC nodes run the same version of the Operating System, i.e. RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). So, +it is sufficient to compile your program on any compute node. Once you +have generated an executable with your compiler, this executable should +be able to run on any other compute-node. +A typical process looks like: +1. Copy your software to the login-node of the HPC +2. Start an interactive session on a compute node; +3. Compile it; +4. Test it locally; +5. Generate your job scripts; +6. Test it on the HPC +7. Run it (in parallel); +We assume you've copied your software to the HPC. The next step is to request +your private compute node. +$ qsub -I +qsub: waiting for job 123456 to start diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json new file mode 100644 index 00000000000..02a8fad0ae2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-and-building-on-the-HPC", + "title_depth": 2, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_4", + "next_title": "compiling_your_software_paragraph_6", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt new file mode 100644 index 00000000000..7ebde664878 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt @@ -0,0 +1,30 @@ +Compiling a sequential program in C +Go to the examples for chapter +Compiling and testing your software on the HPC and load the +foss module: +cd ~/examples/Compiling-and-testing-your-software-on-the-HPC +module load foss +We now list the directory and explore the contents of the "hello.c" +program: +$ ls -l +total 512 +-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c +-rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* +-rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c +-rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs +/* + * VSC : Flemish Supercomputing Centre + * Tutorial : Introduction to HPC + * Description: Print 500 numbers, whilst waiting 1 second in between + */ +#include "stdio.h" +int main( int argc, char *argv[] ) +{ + int i; + for (i=0; i<500; i++) + { + printf("Hello #%d\n", i); + fflush(stdout); + sleep(1); + } +} diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json new file mode 100644 index 00000000000..16942249583 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-sequential-program-in-C", + "title_depth": 3, + "directory": "compiling_your_software", + "links": { + "0": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" + }, + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_5", + "next_title": "compiling_your_software_paragraph_7", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt new file mode 100644 index 00000000000..1d58d0d6ae4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt @@ -0,0 +1,15 @@ +The "hello.c" program is a simple source file, written in C. It'll print +500 times "Hello #<num>", and waits one second between 2 printouts. +We first need to compile this C-file into an executable with the +gcc-compiler. +First, check the command line options for *"gcc" (GNU C-Compiler)*, then +we compile. the O2 option enables a moderate level of optimization when compiling the code. +It instructs the compiler to optimize the code for better performance without significantly increasing compilation time. +Finally, list the contents of the directory again: +$ gcc -help +$ gcc -O2 -o hello hello.c +$ ls -l +total 512 +-rwxrwxr-x 1 vsc40000 7116 Sep 16 11:43 hello* +-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c +-rwxr-xr-x 1 vsc40000 130 Sep 16 11:39 hello.pbs* diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json new file mode 100644 index 00000000000..e5f3161c3f2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-sequential-program-in-C", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_6", + "next_title": "compiling_your_software_paragraph_8", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt new file mode 100644 index 00000000000..5ca5de1e6d4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt @@ -0,0 +1,19 @@ +A new file "hello" has been created. Note that this file has "execute" +rights, i.e., it is an executable. More often than not, calling gcc -- +or any other compiler for that matter -- will provide you with a list of +errors and warnings referring to mistakes the programmer made, such as +typos, syntax errors. You will have to correct them first in order to +make the code compile. Warnings pinpoint less crucial issues that may +relate to performance problems, using unsafe or obsolete language +features, etc. It is good practice to remove all warnings from a +compilation process, even if they seem unimportant so that a code change +that produces a warning does not go unnoticed. +Let's test this program on the local compute node, which is at your +disposal after the qsub --I command: +$ ./hello +Hello #0 +Hello #1 +Hello #2 +Hello #3 +Hello #4 +... diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json new file mode 100644 index 00000000000..942949951d1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-sequential-program-in-C", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_7", + "next_title": "compiling_your_software_paragraph_9", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt new file mode 100644 index 00000000000..28982d2bd95 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt @@ -0,0 +1,32 @@ +It seems to work, now run it on the HPC +qsub hello.pbs +Compiling a parallel program in C/MPI +cd ~/examples/Compiling-and-testing-your-software-on-the-HPC +List the directory and explore the contents of the "mpihello.c" +program: +$ ls -l +total 512 +total 512 +-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c +-rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* +-rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c +-rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs +/* + * VSC : Flemish Supercomputing Centre + * Tutorial : Introduction to HPC + * Description: Example program, to compile with MPI + */ +#include +#include +main(int argc, char **argv) +{ + int node, i, j; + float f; + MPI_Init(&argc,&argv); + MPI_Comm_rank(MPI_COMM_WORLD, &node); + + printf("Hello World from Node %d.\n", node); + for (i=0; i<=100000; i++) + f=i*2.718281828*i+i+i*3.141592654; + MPI_Finalize(); +} diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json new file mode 100644 index 00000000000..fe51e423a96 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-CMPI", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_8", + "next_title": "compiling_your_software_paragraph_10", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-cmpi" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt index 9d84f459724..5df90a3dd7c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt @@ -2,7 +2,7 @@ Generate a public/private key pair with OpenSSH A key pair might already be present in the default location inside your home directory. Therefore, we first check if a key is available with the "list short" ("ls") command: -$ ls ~/.ssh +ls ~/.ssh If a key-pair is already available, you would normally get: authorized_keys id_rsa id_rsa.pub known_hosts Otherwise, the command will show: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt index 3cde4395d81..d29d61d27d9 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt @@ -10,9 +10,4 @@ the passphrase. Be sure to never give away your private key, it is private and should stay private. You should not even copy it to one of your other machines, instead, you should create a new public/private key pair for each machine. -$ ssh-keygen -t rsa -b 4096 -Generating public/private rsa key pair. Enter file in which to save the -key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no -passphrase): Enter same passphrase again: Your identification has been -saved in /home/user/.ssh/id_rsa. Your public key has been saved in -/home/user/.ssh/id_rsa.pub. +ssh-keygen -t rsa -b 4096 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt index e3ef2176f09..8e8429c1642 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt @@ -7,7 +7,7 @@ Agent admitted failure to sign using the key. Permission denied (publickey,gssapi-keyex,gssapi-with-mic). This could be fixed using the ssh-add command. You can include the new private keys' identities in your keyring with: -$ ssh-add +ssh-add tip Without extra options ssh-add adds any key located at $HOME/.ssh directory, but you can specify the private key location path as diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt index 93019fa1a6a..c227dbbb6e2 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt @@ -1,5 +1,5 @@ Check that your key is available from the keyring with: -$ ssh-add -l +ssh-add -l After these changes the key agent will keep your SSH key to connect to the clusters as usual. tip diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt index 3e588c709d4..4c8894438c9 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt @@ -14,7 +14,6 @@ First Time connection to the HPC infrastructure export LANG="en_US.UTF-8" ... - tip "tip: vi" To start entering text in vi: move to the place you want to start entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" @@ -24,11 +23,10 @@ First Time connection to the HPC infrastructure or alternatively (if you are not comfortable with the Linux editors), again on your local machine: - $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile You can now log out, open a new terminal/shell on your local machine and reconnect to the login node, and you should not get these warnings anymore. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt index 2664953ed0c..d09b69552ef 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt @@ -11,4 +11,4 @@ It's also possible to copy entire directories (and their contents) with the -r flag. For example, if we want to copy the local directory dataset to $VSC_SCRATCH, we can use the following command (assuming you've created the scratch symlink): -$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt index 51d39b548c3..532d57bb4a5 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt @@ -1,6 +1,6 @@ If you don't use the -r option to copy a directory, you will run into the following error: -$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +$ scp dataset vsc40000@login.hpc.ugent.be:scratch dataset: not a regular file Using sftp The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt index 4ae257101f1..1ef13b80c6f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt @@ -1,5 +1,5 @@ One easy way of starting a sftp session is -$ sftp vsc40000@login.hpc.ugent.be +sftp vsc40000@login.hpc.ugent.be Typical and popular commands inside an sftp session are: | | | |:--------------------------|:-------------------------------------------------------------------------------------| diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt index 94d5d9500a3..27ae3fb7bd4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt @@ -1,7 +1,7 @@ First Time connection to the HPC infrastructure Connect Open up a terminal and enter the following command to connect to the HPC. -$ ssh vsc40000@login.hpc.ugent.be +ssh vsc40000@login.hpc.ugent.be Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. The first time you make a connection to the login node, you will be diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt index 312fe885cb0..be01e09bba0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt @@ -1,4 +1,4 @@ Permission denied (publickey,gssapi-keyex,gssapi-with-mic). In this case, use the -i option for the ssh command to specify the location of your private key. For example: -$ ssh -i /home/example/my_keys +ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt index 2c97d597425..a8c087f818b 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt @@ -6,7 +6,7 @@ Generate a public/private key pair with OpenSSH A key pair might already be present in the default location inside your home directory. Therefore, we first check if a key is available with the "list short" ("ls") command: -$ ls ~/.ssh +ls ~/.ssh If a key-pair is already available, you would normally get: authorized_keys id_rsa id_rsa.pub known_hosts Otherwise, the command will show: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt index 3cde4395d81..d29d61d27d9 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt @@ -10,9 +10,4 @@ the passphrase. Be sure to never give away your private key, it is private and should stay private. You should not even copy it to one of your other machines, instead, you should create a new public/private key pair for each machine. -$ ssh-keygen -t rsa -b 4096 -Generating public/private rsa key pair. Enter file in which to save the -key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no -passphrase): Enter same passphrase again: Your identification has been -saved in /home/user/.ssh/id_rsa. Your public key has been saved in -/home/user/.ssh/id_rsa.pub. +ssh-keygen -t rsa -b 4096 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt index d204f4e4392..1069ebd9fbd 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt @@ -7,7 +7,7 @@ Agent admitted failure to sign using the key. Permission denied (publickey,gssapi-keyex,gssapi-with-mic). This could be fixed using the ssh-add command. You can include the new private keys' identities in your keyring with: -$ ssh-add +ssh-add tip Without extra options ssh-add adds any key located at $HOME/.ssh directory, but you can specify the private key location path as diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt index 8fd93f6b4f6..c880ee4a228 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt @@ -1,5 +1,5 @@ Check that your key is available from the keyring with: -$ ssh-add -l +ssh-add -l After these changes the key agent will keep your SSH key to connect to the clusters as usual. tip diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt index 3e588c709d4..4c8894438c9 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt @@ -14,7 +14,6 @@ First Time connection to the HPC infrastructure export LANG="en_US.UTF-8" ... - tip "tip: vi" To start entering text in vi: move to the place you want to start entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" @@ -24,11 +23,10 @@ First Time connection to the HPC infrastructure or alternatively (if you are not comfortable with the Linux editors), again on your local machine: - $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile You can now log out, open a new terminal/shell on your local machine and reconnect to the login node, and you should not get these warnings anymore. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt index 2664953ed0c..d09b69552ef 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt @@ -11,4 +11,4 @@ It's also possible to copy entire directories (and their contents) with the -r flag. For example, if we want to copy the local directory dataset to $VSC_SCRATCH, we can use the following command (assuming you've created the scratch symlink): -$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt index 51d39b548c3..532d57bb4a5 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt @@ -1,6 +1,6 @@ If you don't use the -r option to copy a directory, you will run into the following error: -$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +$ scp dataset vsc40000@login.hpc.ugent.be:scratch dataset: not a regular file Using sftp The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt index 4ae257101f1..1ef13b80c6f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt @@ -1,5 +1,5 @@ One easy way of starting a sftp session is -$ sftp vsc40000@login.hpc.ugent.be +sftp vsc40000@login.hpc.ugent.be Typical and popular commands inside an sftp session are: | | | |:--------------------------|:-------------------------------------------------------------------------------------| diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt index d4c89b7e1c7..1e22cfc8b1f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt @@ -2,7 +2,7 @@ First Time connection to the HPC infrastructure Connect Open up a terminal and enter the following command to connect to the HPC. You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search. -$ ssh vsc40000@login.hpc.ugent.be +ssh vsc40000@login.hpc.ugent.be Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. The first time you make a connection to the login node, you will be diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt index 6fa418464dd..f3f5ac6e775 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt @@ -4,4 +4,4 @@ private key somewhere else than the default location Permission denied (publickey,gssapi-keyex,gssapi-with-mic). In this case, use the -i option for the ssh command to specify the location of your private key. For example: -$ ssh -i /home/example/my_keys +ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt index b5ecfb93e88..69db57957dc 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt @@ -33,10 +33,9 @@ to the HPC cluster via the login node "login.hpc.ugent.be". login-node of the HPC. 10. To check you can now "Print the Working Directory" (pwd) and check the name of the computer, where you have logged in (hostname): - $ pwd + $ pwd /user/home/gent/vsc400/vsc40000 $ hostname -f gligar07.gastly.os - 11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") from the list, "Load" it and press "Open". From 6d04bbc7656406eaa1d00e0386ecfea76848b5e7 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 28 Aug 2024 15:45:41 +0200 Subject: [PATCH 132/152] add source-directory to metadata and verbose mode --- .../chatbot_parser.py | 34 +++++++++++-------- .../tps1/tps1_paragraph_1_metadata.json | 1 + .../tps1/tps1_paragraph_3_metadata.json | 1 + .../tps1_linux_paragraph_2.1_metadata.json | 1 + .../tps1_linux_paragraph_2.2_metadata.json | 1 + .../tps1_macos_paragraph_2.1_metadata.json | 1 + .../tps1_macos_paragraph_2.2_metadata.json | 1 + .../tps1_windows_paragraph_2.1_metadata.json | 1 + .../tps1_windows_paragraph_2.2_metadata.json | 1 + .../Subtitle-1/Subtitle-1_metadata.json | 1 + .../Subtitle-5-g/Subtitle-5-g_metadata.json | 1 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 1 + .../Subtitle-4-l&m_metadata.json | 1 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 1 + .../Subtitle-4-l&m_metadata.json | 1 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 1 + .../Subtitle-3-w/Subtitle-3-w_metadata.json | 1 + .../tests/test_full_script.py | 8 +++-- .../tests/test_write_metadata.py | 12 +++---- 19 files changed, 47 insertions(+), 23 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 2b23fb4e962..e4ed0009654 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -21,6 +21,7 @@ INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT" SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS" DEEP_DIRECTORIES = "DEEP_DIRECTORIES" +VERBOSE = "VERBOSE" # directories PARSED_MDS = "parsed_mds" @@ -67,6 +68,7 @@ WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message" # Metadata attributes +SOURCE_FILE = "source_file" MAIN_TITLE = "main_title" SUBTITLE = "subtitle" TITLE_DEPTH = "title_depth" @@ -207,7 +209,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): if '???' in curr_line: curr_line = re.sub(r'\?\?\?', "", curr_line) - # get rid of other markdown indicators (`, *, +, _) + # get rid of other indicators (`, *, +, _) if not in_code_block: backquotes = re.findall(r'`(.*?)`', curr_line) @@ -320,7 +322,7 @@ def split_on_titles(file, main_title, options): paragraphs_os_free_text[title] = current_paragraph # write metadata of previous file - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md') # make a new title title = make_valid_title(line[title_level + 1:-1]) @@ -357,7 +359,7 @@ def split_on_titles(file, main_title, options): paragraphs_os_text[title] = current_paragraph else: paragraphs_os_free_text[title] = current_paragraph - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level]) + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md') return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order @@ -407,7 +409,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # metadata title metadata_title = main_title - # TODO: define metadata data if split occurs on paragraphs and last_title and title_level are known (placeholder in place right now) + # define metadata data if split occurs on paragraphs and last_title and title_level are known (will be replaced later on in the process) if current_paragraph_number != -1: last_title_level = 4 last_dir = "PLACEHOLDER" @@ -467,7 +469,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, paragraphs_os_free_text[paragraph_title] = current_paragraph # write metadata of previous file - paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir) + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') subtitle_order.append(paragraph_title) # reset the current paragraph @@ -512,13 +514,13 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, paragraphs_os_text[paragraph_title] = current_paragraph else: paragraphs_os_free_text[paragraph_title] = current_paragraph - paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level]) + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') subtitle_order.append(paragraph_title) return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order -def write_metadata(main_title, subtitle, links, title_level, directory): +def write_metadata(main_title, subtitle, links, title_level, directory, source_file): """ Function that writes metadata about a text section to a dictionary @@ -527,10 +529,11 @@ def write_metadata(main_title, subtitle, links, title_level, directory): :param links: a list of links contained within the section :param title_level: the depth of the title of the section :param directory: the directory where the section will eventually be written (can either be generic or os-specific) + :param source_file: the source file that the section originates from :return paragraph_metadata: dictionary containing the metadata about the section """ - paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, TITLE_DEPTH: title_level, DIRECTORY: directory} + paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory} if len(links) > 0: paragraph_metadata[LINKS] = {} @@ -918,7 +921,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or pass -def main(options, verbose=True): +def main(options): """ main function @@ -931,11 +934,10 @@ def main(options, verbose=True): MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not} - :param verbose: boolean indicating whether print statements from the main function should be print, only used when for testing :return: """ - if options[DEEP_DIRECTORIES] and verbose: + if options[DEEP_DIRECTORIES] and options[VERBOSE]: print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason @@ -963,7 +965,6 @@ def main(options, verbose=True): # for loops over all files for filename in filenames.keys(): - print("Processing " + filename) ################### define/reset loop specific variables ################### # boolean indicating whether the current file is part of the linux tutorial @@ -987,6 +988,9 @@ def main(options, verbose=True): ################### actually parse the md file ################### + if options[VERBOSE]: + print("Processing " + filename) + # create directories for the source markdown file for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: os.makedirs(directory, exist_ok=True) @@ -1015,7 +1019,7 @@ def main(options, verbose=True): if os.path.exists(TEMP_JINJA_FILE): os.remove(TEMP_JINJA_FILE) - if verbose: + if options[VERBOSE]: print("Parsing finished successfully") @@ -1031,6 +1035,7 @@ def main(options, verbose=True): parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") + parser.add_argument("-v", "--verbose", action="store_true", help="Run the script with verbose output") args = parser.parse_args() @@ -1041,6 +1046,7 @@ def main(options, verbose=True): MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, MAX_TITLE_DEPTH: args.max_title_depth, INCLUDE_LINKS_IN_PLAINTEXT: args.links, - DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} + DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles, + VERBOSE: args.verbose} main(options_dict) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json index 19e44fad91d..08c0b4e4973 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Main-title", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 1, "directory": "tps1", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json index b4c98ff6465..2f1ea4dcd1f 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Conclusion", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 2, "directory": "tps1", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json index bac81ed87e3..208cb3472f4 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "OS-specific-sections", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 2, "directory": "tps1", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json index 522265436ab..b975dfe4e03 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Non-Windows-section", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 3, "directory": "tps1", "parent_title": "OS-specific-sections", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json index 5d9ec163f99..9c605eb9004 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "OS-specific-sections", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 2, "directory": "tps1", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json index 7b06f06efdd..e3ca81d7cc5 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Non-Windows-section", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 3, "directory": "tps1", "parent_title": "OS-specific-sections", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json index e8e50aa6c32..ab58c622b8c 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "OS-specific-sections", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 2, "directory": "tps1", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json index 84ea6ad53f9..435c9e9c484 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Windows-specific-section", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 3, "directory": "tps1", "parent_title": "OS-specific-sections", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json index 9fdbce652bf..b7786c066a7 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-1", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-1", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json index b48bcaaa08c..eb5403804e2 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-5-g", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-5-g", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json index a2b68c8865e..f7330bec86d 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-2-g", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-2-g", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json index 537541e2cb0..a76f852c874 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-4-l&m", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 3, "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", "parent_title": "Subtitle-2-g", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json index 6846da26b72..8b234c92fa6 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-2-g", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-2-g", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json index 4e167b116d2..732d309da81 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-4-l&m", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 3, "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", "parent_title": "Subtitle-2-g", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json index c4620a94080..7a43426a85f 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-2-g", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-2-g", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json index aa4b6317ce6..4d7f494320d 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-3-w", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 3, "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w", "parent_title": "Subtitle-2-g", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py index 61a6f3f1bdf..91605dec651 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py @@ -14,7 +14,8 @@ "MIN_PARAGRAPH_LENGTH": 160, "MAX_TITLE_DEPTH": 4, "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False} + "DEEP_DIRECTORIES": False, + "VERBOSE": False} ), ("tests/test_files/ftts", "tests/test_files/ftts/actual", "tests/test_files/ftts/output", @@ -25,12 +26,13 @@ "MIN_PARAGRAPH_LENGTH": 160, "MAX_TITLE_DEPTH": 4, "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": True} + "DEEP_DIRECTORIES": True, + "VERBOSE": False} ) ]) def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options): # run the script - main(options, verbose=False) + main(options) # Compare directories and files for dirpath, dirnames, filenames in os.walk(expected_output_directory): diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py index 68f1772cb24..6c30fef7985 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py @@ -3,13 +3,13 @@ from chatbot_parser import write_metadata -@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,output", [ - ("", "", [], 1, "", {"main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), +@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [ + ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2, - os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), - {"main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, + os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source", + {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}}) ]) -def test_write_metadata(main_title, subtitle, links, title_level, directory, output): - assert write_metadata(main_title, subtitle, links, title_level, directory) == output +def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output): + assert write_metadata(main_title, subtitle, links, title_level, directory, source_file) == output From f33cfb3b22feacf540944dc8812d5a55c59763d4 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 28 Aug 2024 16:57:24 +0200 Subject: [PATCH 133/152] added verbose mode --- .../chatbot_parser.py | 124 +++++++++++++++--- 1 file changed, 107 insertions(+), 17 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e4ed0009654..60776fcb379 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -106,6 +106,9 @@ # Marker for comments for the bot INPUT_FOR_BOT = "INPUT_FOR_BOT" +# Standard strings for verbose output +LINE = "------------------------------------------------------------------------------------------------------\n" + ################### define functions ################### @@ -266,6 +269,10 @@ def split_on_titles(file, main_title, options): :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance """ + + if options[VERBOSE]: + print("Splitting on titles\n") + # start of assuming we haven't encountered a title after_first_title = False @@ -302,15 +309,20 @@ def split_on_titles(file, main_title, options): # detect if-statements starting or ending on the current line in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block + if options[VERBOSE]: + if in_code_block: + print("Detected start of a codeblock, not registering titles") + else: + print("Detected end of codeblock, registering titles again") + # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: title_level = check_for_title(line, in_code_block, curr_dirs, options) - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block - # line is a title with a maximum depth of 4 if title_level > 0: if after_first_title: @@ -318,8 +330,12 @@ def split_on_titles(file, main_title, options): # write text of previous file if previous_contained_if: paragraphs_os_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + title + "\n") else: paragraphs_os_free_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + title + "\n") # write metadata of previous file paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md') @@ -357,8 +373,12 @@ def split_on_titles(file, main_title, options): # write dictionaries for the last file if previous_contained_if: paragraphs_os_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + title + "\n") else: paragraphs_os_free_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + title + "\n") paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md') return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order @@ -377,6 +397,10 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance """ + + if options[VERBOSE]: + print("Splitting on paragraphs\n") + # start of assuming we are not in a code_block in_code_block = False @@ -435,23 +459,33 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # detect whether the current line is in a list if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line): # beginning of a list entry in_list = True - # print("List entry found") + if options[VERBOSE]: + print("First line of new list entry found, not starting new paragraphs: " + line[:-1]) elif re.search(r'^\s{2,}.+$', line) and in_list: # middle of a list entry pass elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries pass + elif in_list: + if options[VERBOSE]: + print("List ended, starting new paragraphs again") + in_list = False else: in_list = False + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block + if options[VERBOSE]: + if in_code_block: + print("Detected start of a codeblock, not starting new paragraphs") + else: + print("Detected end of codeblock, starting new paragraphs again") + # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: title_level = check_for_title(line, in_code_block, curr_dirs, options) - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block - # check whether a new paragraph should be started if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block and not in_list: @@ -465,8 +499,12 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # write text of previous file if previous_contained_if: paragraphs_os_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") else: paragraphs_os_free_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + paragraph_title + "\n") # write metadata of previous file paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') @@ -512,8 +550,12 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # write dictionaries for the last file if previous_contained_if: paragraphs_os_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") else: paragraphs_os_free_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + paragraph_title + "\n") paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') subtitle_order.append(paragraph_title) @@ -557,6 +599,9 @@ def jinja_parser(filename, copy_location, options): # YAML file location yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml') + if options[VERBOSE]: + print("Reading YAML file from location: " + yml_file_path) + # Read the YAML file with open(yml_file_path, 'r') as yml_file: words_dict = yaml.safe_load(yml_file) @@ -569,6 +614,9 @@ def jinja_parser(filename, copy_location, options): } combined_context = {**words_dict, **additional_context} + if options[VERBOSE]: + print("Mangling OS-specific if-statements") + # Mangle the OS-related if-statements mangle_ifs(copy_location, filename, options) @@ -578,6 +626,9 @@ def jinja_parser(filename, copy_location, options): template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) + if options[VERBOSE]: + print("jinja parsing finished\nWriting to location: " + copy_location) + # Save the rendered content to a new file with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: output_file.write(rendered_content) @@ -601,7 +652,7 @@ def load_macros(name): return readfile.read() -def mangle_os_ifs(line, is_os): +def mangle_os_ifs(line, is_os, options): """ function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. @@ -612,6 +663,7 @@ def mangle_os_ifs(line, is_os): NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if OS_IF: in an os-if OS_IF_IN_OS_IF: in an os-if nested in an os-if + :param options: dictionary containing the options given by the user :return line: the modified line with mangled os-related if-statements """ @@ -640,6 +692,8 @@ def mangle_os_ifs(line, is_os): # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): + if options[VERBOSE]: + print("OS-specific endif statement found in line: " + line[:-1]) line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: @@ -651,6 +705,8 @@ def mangle_os_ifs(line, is_os): elif if_match: if if_os_match: + if options[VERBOSE]: + print("OS-specific if statement found in line: " + line[:-1]) line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: @@ -665,6 +721,8 @@ def mangle_os_ifs(line, is_os): elif else_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): + if options[VERBOSE]: + print("OS-specific else statement found in line: " + line[:-1]) line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) @@ -688,7 +746,7 @@ def mangle_ifs(directory, filename, options): with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: - new_line, is_os = mangle_os_ifs(line, is_os) + new_line, is_os = mangle_os_ifs(line, is_os, options) write_file.write(new_line) @@ -733,6 +791,9 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) + if options[VERBOSE]: + print("Writing generic section " + title + " to filepath: " + str(filepath)) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial) else: # don't write empty files @@ -911,6 +972,9 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) os.makedirs(filepath, exist_ok=True) + if options[VERBOSE]: + print("Writing os-specific section " + os_subtitle + " to filepath: " + str(filepath)) + # write to files write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial) else: @@ -933,10 +997,14 @@ def main(options): MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph, MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, - DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not} + DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not, + VERBOSE: enable or disable verbose mode} :return: """ + if options[VERBOSE]: + print("Running chatbot parser with options: " + str(options)) + if options[DEEP_DIRECTORIES] and options[VERBOSE]: print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") @@ -976,9 +1044,10 @@ def main(options): # variable that keeps track of the directories that are used to write in at different levels root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR) - root_dir_os_specific_linux = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, LINUX) - root_dir_os_specific_windows = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) - root_dir_os_specific_macos = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, MACOS) + root_dir_os_specific = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR) + root_dir_os_specific_linux = os.path.join(root_dir_os_specific, LINUX) + root_dir_os_specific_windows = os.path.join(root_dir_os_specific, WINDOWS) + root_dir_os_specific_macos = os.path.join(root_dir_os_specific, MACOS) # variable for the main title (needed for reference links) main_title = filename[:-3] @@ -989,18 +1058,31 @@ def main(options): ################### actually parse the md file ################### if options[VERBOSE]: - print("Processing " + filename) + print(LINE + "Processing " + filename) + print("Location: " + filenames[filename]) + print("\nMaking directories:") # create directories for the source markdown file - for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: + for directory in [root_dir_generic, root_dir_os_specific, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: + if options[VERBOSE]: + print(directory) os.makedirs(directory, exist_ok=True) + if options[VERBOSE]: + print("\nParsing the sourcefile with jinja") + # process the jinja macros jinja_parser(filename, copy_file, options) + if options[VERBOSE]: + print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)") + # split the text in paragraphs paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) + if options[VERBOSE]: + print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata") + # for every section, either make the whole section generic, or create an os-specific file for each OS for i, subtitle in enumerate(subtitle_order): @@ -1012,6 +1094,14 @@ def main(options): else: split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial) + if options[VERBOSE]: + print("\nFinished processing " + filename) + + if options[VERBOSE]: + print(LINE + "Cleaning up directories:") + print(os.path.join(options[DESTINATION_DIRECTORY], COPIES)) + print(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES)) + print(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL)) # clean up temporary directories and files shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) From 3227f1939ef3933d0a8fcc22835239021abae0aa Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 09:17:04 +0200 Subject: [PATCH 134/152] Added limitation on lists --- scripts/HPC_chatbot_preprocessor/README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index b3bce665973..1795ee71554 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -164,6 +164,18 @@ Comments can be written in such a way that the script will keep them as input fo ``` +This will be reworked to + +``` +your comment for the bot +``` + +in the final output. + ### Long filepaths Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`. + +### Markdown lists + +The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays. From 67aed53662656f95c7a9b718cf372d1ca5349283 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 10:46:56 +0200 Subject: [PATCH 135/152] fix for non os-specific if-statement not being recognised --- scripts/HPC_chatbot_preprocessor/README.md | 11 +++++ .../chatbot_parser.py | 34 +++++++++++++- .../generic/account/account_paragraph_10.txt | 8 ++-- .../account_paragraph_10_metadata.json | 5 +- .../account_paragraph_12_metadata.json | 1 + .../account/account_paragraph_1_metadata.json | 1 + .../account/account_paragraph_2_metadata.json | 1 + .../account/account_paragraph_3_metadata.json | 1 + .../generic/account/account_paragraph_8.txt | 11 +++-- .../account/account_paragraph_8_metadata.json | 1 + .../connecting/connecting_paragraph_10.txt | 24 ++++++++++ .../connecting_paragraph_10_metadata.json} | 7 +-- .../connecting/connecting_paragraph_14.txt | 7 --- .../connecting_paragraph_14_metadata.json | 14 ------ .../connecting/connecting_paragraph_15.txt | 19 +++----- .../connecting_paragraph_15_metadata.json | 6 +-- .../connecting/connecting_paragraph_16.txt | 11 +++++ .../connecting_paragraph_16_metadata.json | 16 +++++++ .../connecting_paragraph_1_metadata.json | 1 + .../connecting/connecting_paragraph_2.txt | 2 +- .../connecting_paragraph_2_metadata.json | 1 + .../connecting/connecting_paragraph_3.txt | 1 - .../connecting_paragraph_3_metadata.json | 1 + .../connecting/connecting_paragraph_6.txt | 2 - .../connecting_paragraph_6_metadata.json | 1 + .../connecting/connecting_paragraph_7.txt | 1 - .../connecting_paragraph_7_metadata.json | 1 + .../connecting/connecting_paragraph_8.txt | 3 +- .../connecting_paragraph_8_metadata.json | 1 + .../connecting/connecting_paragraph_9.txt | 46 ++++++++----------- .../connecting_paragraph_9_metadata.json | 1 + .../account/account_linux_paragraph_11.1.txt | 3 ++ ...account_linux_paragraph_11.1_metadata.json | 1 + .../account_linux_paragraph_4.1_metadata.json | 1 + .../account_linux_paragraph_5.1_metadata.json | 1 + .../account_linux_paragraph_5.2_metadata.json | 1 + .../account_linux_paragraph_5.3_metadata.json | 1 + .../account_linux_paragraph_5.4_metadata.json | 1 + .../account_linux_paragraph_5.5_metadata.json | 1 + .../account_linux_paragraph_6.1_metadata.json | 1 + .../account_linux_paragraph_7.1_metadata.json | 1 + .../account_linux_paragraph_7.2_metadata.json | 1 + .../account/account_linux_paragraph_9.1.txt | 9 ++-- .../account_linux_paragraph_9.1_metadata.json | 1 + .../connecting_linux_paragraph_10.1.txt | 33 ------------- ...necting_linux_paragraph_10.1_metadata.json | 11 ----- .../connecting_linux_paragraph_11.1.txt | 43 ++++++++++++++--- ...necting_linux_paragraph_11.1_metadata.json | 5 +- .../connecting_linux_paragraph_12.1.txt | 18 +++----- ...necting_linux_paragraph_12.1_metadata.json | 14 +++--- .../connecting_linux_paragraph_13.1.txt | 20 ++++---- ...necting_linux_paragraph_13.1_metadata.json | 10 ++-- ...xt => connecting_linux_paragraph_13.2.txt} | 0 ...ecting_linux_paragraph_13.2_metadata.json} | 5 +- ...xt => connecting_linux_paragraph_13.3.txt} | 0 ...ecting_linux_paragraph_13.3_metadata.json} | 5 +- ...xt => connecting_linux_paragraph_13.4.txt} | 0 ...ecting_linux_paragraph_13.4_metadata.json} | 5 +- ...xt => connecting_linux_paragraph_13.5.txt} | 0 ...ecting_linux_paragraph_13.5_metadata.json} | 5 +- ...xt => connecting_linux_paragraph_13.6.txt} | 0 ...ecting_linux_paragraph_13.6_metadata.json} | 5 +- .../connecting_linux_paragraph_14.1.txt | 10 ++++ ...necting_linux_paragraph_14.1_metadata.json | 12 +++++ ...nnecting_linux_paragraph_5.1_metadata.json | 1 + ...nnecting_linux_paragraph_5.2_metadata.json | 1 + .../account/account_macos_paragraph_11.1.txt | 3 ++ ...account_macos_paragraph_11.1_metadata.json | 1 + .../account_macos_paragraph_4.1_metadata.json | 1 + .../account_macos_paragraph_5.1_metadata.json | 1 + .../account_macos_paragraph_5.2_metadata.json | 1 + .../account_macos_paragraph_5.3_metadata.json | 1 + .../account_macos_paragraph_5.4_metadata.json | 1 + .../account_macos_paragraph_5.5_metadata.json | 1 + .../account_macos_paragraph_6.1_metadata.json | 1 + .../account_macos_paragraph_7.1_metadata.json | 1 + .../account_macos_paragraph_7.2_metadata.json | 1 + .../account/account_macos_paragraph_9.1.txt | 9 ++-- .../account_macos_paragraph_9.1_metadata.json | 1 + .../connecting_macos_paragraph_10.1.txt | 33 ------------- .../connecting_macos_paragraph_11.1.txt | 43 ++++++++++++++--- ...necting_macos_paragraph_11.1_metadata.json | 5 +- .../connecting_macos_paragraph_12.1.txt | 18 +++----- ...necting_macos_paragraph_12.1_metadata.json | 14 +++--- .../connecting_macos_paragraph_12.2.txt | 17 ------- ...necting_macos_paragraph_12.2_metadata.json | 11 ----- .../connecting_macos_paragraph_13.1.txt | 25 +++++----- ...necting_macos_paragraph_13.1_metadata.json | 8 +++- .../connecting_macos_paragraph_13.2.txt | 20 ++++++-- ...necting_macos_paragraph_13.2_metadata.json | 7 +-- ...xt => connecting_macos_paragraph_13.3.txt} | 0 ...ecting_macos_paragraph_13.3_metadata.json} | 5 +- ...xt => connecting_macos_paragraph_13.4.txt} | 0 ...ecting_macos_paragraph_13.4_metadata.json} | 5 +- ...xt => connecting_macos_paragraph_13.5.txt} | 0 ...ecting_macos_paragraph_13.5_metadata.json} | 5 +- ...xt => connecting_macos_paragraph_13.6.txt} | 0 ...ecting_macos_paragraph_13.6_metadata.json} | 5 +- .../connecting_macos_paragraph_14.1.txt | 15 ++++++ ...necting_macos_paragraph_14.1_metadata.json | 12 +++++ .../connecting_macos_paragraph_14.2.txt | 3 ++ ...necting_macos_paragraph_14.2_metadata.json | 12 +++++ ...nnecting_macos_paragraph_5.1_metadata.json | 1 + ...nnecting_macos_paragraph_5.2_metadata.json | 1 + .../account_windows_paragraph_11.1.txt | 3 ++ ...count_windows_paragraph_11.1_metadata.json | 1 + ...ccount_windows_paragraph_4.1_metadata.json | 1 + ...ccount_windows_paragraph_4.2_metadata.json | 1 + .../account/account_windows_paragraph_4.3.txt | 8 ++++ ...ccount_windows_paragraph_4.3_metadata.json | 4 ++ ...ccount_windows_paragraph_4.4_metadata.json | 1 + ...ccount_windows_paragraph_6.1_metadata.json | 1 + ...ccount_windows_paragraph_6.2_metadata.json | 1 + ...ccount_windows_paragraph_6.3_metadata.json | 1 + .../account/account_windows_paragraph_9.1.txt | 9 ++-- ...ccount_windows_paragraph_9.1_metadata.json | 1 + .../connecting_windows_paragraph_10.1.txt | 5 -- ...cting_windows_paragraph_10.1_metadata.json | 11 ----- .../connecting_windows_paragraph_11.1.txt | 29 ++++-------- ...cting_windows_paragraph_11.1_metadata.json | 11 +++-- .../connecting_windows_paragraph_12.1.txt | 22 +++++++++ ...ting_windows_paragraph_12.1_metadata.json} | 5 +- ... => connecting_windows_paragraph_12.2.txt} | 0 ...cting_windows_paragraph_12.2_metadata.json | 12 +++++ ... => connecting_windows_paragraph_12.3.txt} | 0 ...ting_windows_paragraph_12.3_metadata.json} | 5 +- .../connecting_windows_paragraph_4.1.txt | 1 + ...ecting_windows_paragraph_4.1_metadata.json | 1 + 128 files changed, 533 insertions(+), 355 deletions(-) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt rename scripts/HPC_chatbot_preprocessor/parsed_mds/{os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json => generic/connecting/connecting_paragraph_10_metadata.json} (53%) delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.2.txt => connecting_linux_paragraph_13.2.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.2_metadata.json => connecting_linux_paragraph_13.2_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.3.txt => connecting_linux_paragraph_13.3.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.4_metadata.json => connecting_linux_paragraph_13.3_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.4.txt => connecting_linux_paragraph_13.4.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.3_metadata.json => connecting_linux_paragraph_13.4_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.5.txt => connecting_linux_paragraph_13.5.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.5_metadata.json => connecting_linux_paragraph_13.5_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.6.txt => connecting_linux_paragraph_13.6.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.6_metadata.json => connecting_linux_paragraph_13.6_metadata.json} (65%) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.3.txt => connecting_macos_paragraph_13.3.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.3_metadata.json => connecting_macos_paragraph_13.3_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.4.txt => connecting_macos_paragraph_13.4.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.4_metadata.json => connecting_macos_paragraph_13.4_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.5.txt => connecting_macos_paragraph_13.5.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.5_metadata.json => connecting_macos_paragraph_13.5_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.6.txt => connecting_macos_paragraph_13.6.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.6_metadata.json => connecting_macos_paragraph_13.6_metadata.json} (65%) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.2_metadata.json => connecting_windows_paragraph_12.1_metadata.json} (63%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.2.txt => connecting_windows_paragraph_12.2.txt} (100%) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.3.txt => connecting_windows_paragraph_12.3.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.3_metadata.json => connecting_windows_paragraph_12.3_metadata.json} (63%) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 1795ee71554..27c1bf3fea6 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -145,6 +145,17 @@ endif This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. +### Non OS-related if-statements + +Due to the way jinja parses the sourcefiles, the script slightly alters non os-specific if-statements as well. It expects if-statements of the following form: + +``` +{%- if site == gent %} +{% if site != (gent or brussel) %} +``` + +All spaces and the dash are optional. City names don't need to be fully lowercase since the parser will capitalize them properly anyway. + ### html syntax The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 60776fcb379..3129ccaf566 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -620,6 +620,12 @@ def jinja_parser(filename, copy_location, options): # Mangle the OS-related if-statements mangle_ifs(copy_location, filename, options) + if options[VERBOSE]: + print("Altering other if-statements to parse properly") + + # Alter the other if-statements + alter_ifs(filename, options) + # Use Jinja2 to replace the macros template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)]) templateEnv = Environment(loader=template_loader) @@ -627,7 +633,7 @@ def jinja_parser(filename, copy_location, options): rendered_content = template.render(combined_context) if options[VERBOSE]: - print("jinja parsing finished\nWriting to location: " + copy_location) + print("jinja parsing finished\nWriting jinja-parsed file to location: " + copy_location) # Save the rendered content to a new file with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: @@ -750,6 +756,32 @@ def mangle_ifs(directory, filename, options): write_file.write(new_line) +def alter_ifs(filename, options): + """ + Function that further adapts the if-statements in a file and writes it to a location where the jinja parser will use it. + This is because the jinja parser doesn't seem to be able to handle statements like {% site == gent %} with context {'site': 'Gent'} in this case. + These statements get changed to {% site == 'Gent' %} in this function. + + :param filename: the filename of the file to be transformed + :param options: dictionary containing the options given by the user + :return: + """ + + with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'r') as read_file: + content = read_file.read() + + pattern = r'(\{%-?\s?[a-zA-Z\s]*?[!=]=\s?\(?)([a-zA-Z\s]+(?:\sor\s[a-zA-Z\s]+)*)(\)?\s?%})' + content = re.sub(pattern, + lambda match: (f"{match.group(1)}" + + " or ".join([f"'{city.strip().capitalize()}'" for city in match.group(2).split(" or ")]) + + f"{match.group(3)}" + ), + content) + + with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: + write_file.write(content) + + def make_valid_title(title): """ function that makes sure all titles can be used as valid filenames diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt index f486b9b1348..7b0a39279e4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt @@ -1,3 +1,7 @@ +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. Welcome e-mail Within one day, you should receive a Welcome e-mail with your VSC account details. @@ -12,7 +16,3 @@ Kind regards, -- The VSC administrators Now, you can start using the HPC. You can always look up your VSC id later by visiting . -Adding multiple SSH public keys (optional) -In case you are connecting from different computers to the login nodes, -it is advised to use separate SSH public keys to do so. You should -follow these steps. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json index 4b5b5202d1c..e417029c16f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json @@ -1,11 +1,12 @@ { "main_title": "account", - "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "subtitle": "Welcome-e-mail", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "", "previous_title": "account_paragraph_9", "next_title": "account_paragraph_11", "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#adding-multiple-ssh-public-keys-optional" + "reference_link": "https://docs.hpc.ugent.be/account/#welcome-e-mail" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json index a5df035df49..e43e729aa74 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Computation-Workflow-on-the-HPC", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json index 726ce9f94fa..cdba091d7df 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Getting-ready-to-request-an-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json index 257f886c6e0..0b22e2986a0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Getting-ready-to-request-an-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json index b94f233779b..bd2f73195a6 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "How-do-SSH-keys-work", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt index 125b566419a..6c5695dfff3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt @@ -6,8 +6,9 @@ Select "UGent" in the dropdown box and optionally select "Save my preference" and "permanently". Click "Confirm" You will now be taken to the authentication page of your institute. -After you log in using your UGent login and password, you will be asked to -upload the file that contains your public key, i.e., the file -"id_rsa.pub" which you have generated earlier. Make sure that your -public key is actually accepted for upload, because if it is in a wrong -format, wrong type or too short, then it will be refused. +You will now have to log in with CAS using your UGent account. +You either have a login name of maximum 8 characters, or a (non-UGent) +email address if you are an external user. In case of problems with your +UGent password, please visit: . After +logging in, you may be requested to share your information. Click "Yes, +continue". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json index 6d186b6ff46..6a77c48dbd1 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Applying-for-the-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt new file mode 100644 index 00000000000..5c715d218a1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt @@ -0,0 +1,24 @@ +You can exit the connection at anytime by entering: +$ exit +logout +Connection to login.hpc.ugent.be closed. + tip "tip: Setting your Language right" + You may encounter a warning message similar to the following one during connecting: + perl: warning: Setting locale failed. + perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_CTYPE = "UTF-8", + LANG = (unset) + are supported and installed on your system. + perl: warning: Falling back to the standard locale ("C"). + or any other error message complaining about the locale. + This means that the correct "locale" has not yet been properly specified on your local machine. Try: + LANG= + LC_COLLATE="C" + LC_CTYPE="UTF-8" + LC_MESSAGES="C" + LC_MONETARY="C" + LC_NUMERIC="C" + LC_TIME="C" + LC_ALL= diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json similarity index 53% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json index 4c6e5477119..96a1f9cee80 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", + "parent_title": "", "previous_title": "connecting_paragraph_9", "next_title": "connecting_paragraph_11", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt deleted file mode 100644 index df00d4ed2a4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt +++ /dev/null @@ -1,7 +0,0 @@ -Fast file transfer for large datasets -See the section on rsync in chapter 5 of the Linux intro manual. -Changing login nodes -It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. -For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: -ssh gligar07.gastly.os -This is also possible the other way around. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json deleted file mode 100644 index 0543efa4083..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Changing-login-nodes", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_13", - "next_title": "connecting_paragraph_15", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt index b2197618647..df00d4ed2a4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt @@ -1,12 +1,7 @@ -If you want to find out which login host you are connected to, you can use the hostname command. -$ hostname -gligar07.gastly.os -$ ssh gligar08.gastly.os -$ hostname -gligar08.gastly.os - -Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux. -These can make sessions that 'survives' across disconnects. -You can find more information on how to use these tools here (or on other online sources): -- screen -- tmux \ No newline at end of file +Fast file transfer for large datasets +See the section on rsync in chapter 5 of the Linux intro manual. +Changing login nodes +It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. +For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: +ssh gligar07.gastly.os +This is also possible the other way around. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json index d23146ed79f..ff9c22397d1 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json @@ -1,15 +1,15 @@ { "main_title": "connecting", "subtitle": "Changing-login-nodes", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "links": { - "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/", - "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/" + "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync" }, "parent_title": "", "previous_title": "connecting_paragraph_14", - "next_title": null, + "next_title": "connecting_paragraph_16", "OS": "generic", "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt new file mode 100644 index 00000000000..dd4f3269fb5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt @@ -0,0 +1,11 @@ +If you want to find out which login host you are connected to, you can use the hostname command. +$ hostname +gligar07.gastly.os +$ ssh gligar08.gastly.os +$ hostname +gligar08.gastly.os +Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux. +These can make sessions that 'survives' across disconnects. +You can find more information on how to use these tools here (or on other online sources): +- screen +- tmux \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json new file mode 100644 index 00000000000..623be877f5b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "connecting", + "subtitle": "Changing-login-nodes", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/", + "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_15", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json index ef0bc5473b0..783e60c1ab5 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connecting-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 1, "directory": "connecting", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt index b150c8fbb28..49c4572f3b2 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt @@ -7,7 +7,7 @@ networks, and from (most) Belgian commercial internet providers. All other IP domains are blocked by default. If you are connecting from an IP address that is not allowed direct access, you have the following options to get access to VSC login nodes: -- Use an VPN connection to connect to UGent the network (recommended). +- Use an VPN connection to connect to UGent the network (recommended). See for more information. - Whitelist your IP address automatically by accessing and log in with your UGent account. - While this web connection is active new SSH sessions can be diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json index 39ee53fcf0b..10f3e042d9a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connection-restrictions", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt index 31dd6463266..db490973b7f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt @@ -2,7 +2,6 @@ Trying to establish an SSH connection from an IP address that does not adhere to these restrictions will result in an immediate failure to connect, with an error message like: ssh_exchange_identification: read: Connection reset by peer - First Time connection to the HPC infrastructure The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json index 471e6bfcbf2..e30467d0799 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt index 472991adada..862e6952252 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt @@ -2,14 +2,12 @@ Congratulations, you're on the HPC infrastructure now! To find out where you have landed you can print the current working directory: $ pwd /user/home/gent/vsc400/vsc40000 - Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own subdirectory structure, copy and prepare your applications, compile and test them and submit your jobs on the HPC. $ cd /apps/gent/tutorials $ ls Intro-HPC/ - This directory currently contains all training material for the Introduction to the HPC. More relevant training material to work with the HPC can always be added later in this directory. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json index 1c7ae8ed267..66b2a89fbb1 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt index 35996afe4da..aa590b9b269 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt @@ -15,7 +15,6 @@ $ tree -L 2 |-- example.pbs '-- example.sh 9 directories, 5 files - This directory contains: 1. This HPC Tutorial (in either a Mac, Linux or Windows version). 2. An examples subdirectory, containing all the examples that you need in this diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json index 709753e4dc4..6e3f90fbe8a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt index 096c74c1372..634df6034b1 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt @@ -1,5 +1,4 @@ -$ cd examples - +cd examples tip Typing cd ex followed by tab (the Tab-key) will generate the cd examples command. Command-line completion (also tab completion) is a common feature of the bash command diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json index 0241e0bd6b9..074e7e891ce 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt index 5a634e6bddc..ad2fee7457f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt @@ -1,27 +1,19 @@ -$ cp -r /apps/gent/tutorials/Intro-HPC/examples ~/ - -You can exit the connection at anytime by entering: -$ exit -logout -Connection to login.hpc.ugent.be closed. - - tip "tip: Setting your Language right" - You may encounter a warning message similar to the following one during connecting: - perl: warning: Setting locale failed. - perl: warning: Please check that your locale settings: - LANGUAGE = (unset), - LC_ALL = (unset), - LC_CTYPE = "UTF-8", - LANG = (unset) - are supported and installed on your system. - perl: warning: Falling back to the standard locale ("C"). - or any other error message complaining about the locale. - This means that the correct "locale" has not yet been properly specified on your local machine. Try: - LANG= - LC_COLLATE="C" - LC_CTYPE="UTF-8" - LC_MESSAGES="C" - LC_MONETARY="C" - LC_NUMERIC="C" - LC_TIME="C" - LC_ALL= +cp -r /apps/gent/tutorials/Intro-HPC/examples ~/ +Go to your home directory, check your own private examples directory, ...Ā and start working. +cd +ls -l +Upon connecting you will see a login message containing your last login time stamp and a basic overview of the current cluster utilisation. +Last login: Thu Mar 18 13:15:09 2021 from gligarha02.gastly.os + STEVIN HPC-UGent infrastructure status on Mon, 19 Feb 2024 10:00:01 + cluster - full - free - part - total - running - queued + nodes nodes free nodes jobs jobs + ------------------------------------------------------------------------- + skitty 39 0 26 68 1839 5588 + joltik 6 0 1 10 29 18 + doduo 22 0 75 128 1397 11933 + accelgor 4 3 2 9 18 1 + donphan 0 0 16 16 16 13 + gallade 2 0 5 16 19 136 +For a full view of the current loads and queues see: +https://hpc.ugent.be/clusterstate/ +Updates on current system status and planned maintenance can be found on https://www.ugent.be/hpc/en/infrastructure/status diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json index 40b04f24e9f..bd1d462e614 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt index b2734cc9f89..dfc59211792 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt @@ -1,4 +1,7 @@ Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. 1. Create a new public/private SSH key pair from the new computer. Repeat the process described in sectionĀ Generate a public/private key pair with OpenSSH. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json index 72b9f92061c..ffdeaf550e0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Applying-for-the-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json index 52e1569a8a7..bcc0552177d 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "How-do-SSH-keys-work", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json index 4636f13a4b4..7654a65253a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Test-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json index ca9c4c7dc1d..32f1120307f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Test-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json index d902f6a0838..722ba1a2ad4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json index 1edae26d97b..4f65f6ebf36 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json index 29affc0335e..468fb5d0938 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json index acf12bc0a7d..fb82c40a7d7 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json index b6b1e052345..4214d6cb321 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json index 35466be5b56..de9700c7a5b 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt index a9059b224bf..815c414e059 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt @@ -1,6 +1,7 @@ Applying for the account +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. This file has been stored in the directory "~/.ssh/". -After you have uploaded your public key you will receive an e-mail with -a link to confirm your e-mail address. After confirming your e-mail -address the VSC staff will review and if applicable approve your -account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json index 21988388723..31c14d853b3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Applying-for-the-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt deleted file mode 100644 index 4c8894438c9..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt +++ /dev/null @@ -1,33 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Open the .bashrc on your local machine with your favourite editor and - add the following lines: - - $ nano ~/.bashrc - ... - export LANGUAGE="en_US.UTF-8" - export LC_ALL="en_US.UTF-8" - export LC_CTYPE="en_US.UTF-8" - export LANG="en_US.UTF-8" - ... - - tip "tip: vi" - To start entering text in vi: move to the place you want to start - entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" - To exit vi without saving your changes, enter ""ESC":q!" - - - or alternatively (if you are not comfortable with the Linux editors), - again on your local machine: - - echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - - You can now log out, open a new terminal/shell on your local machine and - reconnect to the login node, and you should not get these warnings anymore. - \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json deleted file mode 100644 index 364c81834cf..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_9", - "next_title": "connecting_paragraph_11", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt index d872c89a0f8..1d912924535 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt @@ -1,6 +1,37 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -The preferred way to transfer files is by using an scp or sftp via the -secure OpenSSH protocol. ships with an implementation of OpenSSH, so you -don't need to install any third-party software to use it. Just open a -terminal window and jump in! +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Note + If you try to set a non-supported locale, then it will be automatically + set to the default. Currently the default is en_US.UFT-8 or en_US, + depending on whether your originally (non-supported) locale was UTF-8 or not. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json index 420f73742f5..ef14b084e5f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", - "subtitle": "Transfer-Files-tofrom-the-HPC", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_10", "next_title": "connecting_paragraph_12", "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc" + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt index 8d0031fcca9..d872c89a0f8 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt @@ -1,12 +1,6 @@ -Transfer Files tofrom the HPC -Using scp -Secure copy or SCP is a tool (command) for securely transferring files between a local -host (= your computer) and a remote host (the HPC). It is based on the -Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., -copy) command, but can copy files to or from remote machines. -It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if -you have symlinks to them in your home directory. See -the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux - for how to do this. -Open an additional terminal window and check that you're working on your -local machine. +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json index 19eba778d90..081156a5d16 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json @@ -1,14 +1,12 @@ { "main_title": "connecting", - "subtitle": "Using-scp", - "title_depth": 3, + "subtitle": "Transfer-Files-tofrom-the-HPC", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" - }, + "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_11", - "next_title": "connecting_linux_paragraph_12.2", + "next_title": "connecting_paragraph_13", "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt index a0496edfb14..8d0031fcca9 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt @@ -1,10 +1,12 @@ Transfer Files tofrom the HPC -Using a GUI -If you prefer a GUI to transfer files back and forth to the HPC, you can -use your file browser. Open your file browser and press -++"Ctrl"+"l"++ -This should open up a address bar where you can enter a URL. -Alternatively, look for the "connect to server" option in your file -browsers menu. -Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter. -You should now be able to browse files on the HPC in your file browser. +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json index d634a356654..988c10028d8 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json @@ -1,11 +1,15 @@ { "main_title": "connecting", - "subtitle": "Using-a-GUI", + "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, "previous_title": "connecting_paragraph_12", - "next_title": "connecting_paragraph_14", + "next_title": "connecting_linux_paragraph_13.2", "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui" + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json index 0b3a3418c55..43affa4e36c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_12.1", - "next_title": "connecting_linux_paragraph_12.3", + "previous_title": "connecting_linux_paragraph_13.1", + "next_title": "connecting_linux_paragraph_13.3", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json index 5a401911cab..ccc74bb5b94 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_12.3", - "next_title": "connecting_linux_paragraph_12.5", + "previous_title": "connecting_linux_paragraph_13.2", + "next_title": "connecting_linux_paragraph_13.4", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json index 5624749ede8..9ffcc4121f4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_12.2", - "next_title": "connecting_linux_paragraph_12.4", + "previous_title": "connecting_linux_paragraph_13.3", + "next_title": "connecting_linux_paragraph_13.5", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json index a479f66e7e0..8e3b4056b6b 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_12.4", - "next_title": "connecting_linux_paragraph_12.6", + "previous_title": "connecting_linux_paragraph_13.4", + "next_title": "connecting_linux_paragraph_13.6", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json similarity index 65% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json index 9c744fd5133..1fc868ffab2 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", @@ -8,8 +9,8 @@ "0": "", "1": "" }, - "previous_title": "connecting_linux_paragraph_12.5", - "next_title": "connecting_linux_paragraph_12.7", + "previous_title": "connecting_linux_paragraph_13.5", + "next_title": "connecting_linux_paragraph_13.7", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt new file mode 100644 index 00000000000..a0496edfb14 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt @@ -0,0 +1,10 @@ +Transfer Files tofrom the HPC +Using a GUI +If you prefer a GUI to transfer files back and forth to the HPC, you can +use your file browser. Open your file browser and press +++"Ctrl"+"l"++ +This should open up a address bar where you can enter a URL. +Alternatively, look for the "connect to server" option in your file +browsers menu. +Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter. +You should now be able to browse files on the HPC in your file browser. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json new file mode 100644 index 00000000000..e3c48fe4829 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_paragraph_13", + "next_title": "connecting_paragraph_15", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json index 05996eb5df2..55613bca732 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json index 85a826e41a3..21b63518804 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt index b2734cc9f89..dfc59211792 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt @@ -1,4 +1,7 @@ Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. 1. Create a new public/private SSH key pair from the new computer. Repeat the process described in sectionĀ Generate a public/private key pair with OpenSSH. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json index dd8b3400419..d9d3c33f876 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Applying-for-the-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json index 33d083958b9..5400014a85c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "How-do-SSH-keys-work", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json index c75d6aede58..028d9d25f7f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Test-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json index 7f6c80a32f6..dfec6f6fd5a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Test-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json index 7c0f0d2a04d..5a10e780b45 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json index 346108200ac..8da465c1f24 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json index 25baa1e073f..9d6f7b1a741 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json index b8931a423d3..17a34a2f80b 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json index c43391b146e..f9b6c751fd4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json index 519b58bb151..072a43cb3e4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt index d11380c2519..5a5a52da062 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt @@ -1,11 +1,12 @@ Applying for the account +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. This file has been stored in the directory "~/.ssh/". tip As ".ssh" is an invisible directory, the Finder will not show it by default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++), which will allow you to enter the name of a directory, which you would like to open in Finder. Here, type "~/.ssh" and press enter. -After you have uploaded your public key you will receive an e-mail with -a link to confirm your e-mail address. After confirming your e-mail -address the VSC staff will review and if applicable approve your -account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json index 6b6e8c72703..86c8c2048bf 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Applying-for-the-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt deleted file mode 100644 index 4c8894438c9..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt +++ /dev/null @@ -1,33 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Open the .bashrc on your local machine with your favourite editor and - add the following lines: - - $ nano ~/.bashrc - ... - export LANGUAGE="en_US.UTF-8" - export LC_ALL="en_US.UTF-8" - export LC_CTYPE="en_US.UTF-8" - export LANG="en_US.UTF-8" - ... - - tip "tip: vi" - To start entering text in vi: move to the place you want to start - entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" - To exit vi without saving your changes, enter ""ESC":q!" - - - or alternatively (if you are not comfortable with the Linux editors), - again on your local machine: - - echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - - You can now log out, open a new terminal/shell on your local machine and - reconnect to the login node, and you should not get these warnings anymore. - \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt index d872c89a0f8..1d912924535 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt @@ -1,6 +1,37 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -The preferred way to transfer files is by using an scp or sftp via the -secure OpenSSH protocol. ships with an implementation of OpenSSH, so you -don't need to install any third-party software to use it. Just open a -terminal window and jump in! +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Note + If you try to set a non-supported locale, then it will be automatically + set to the default. Currently the default is en_US.UFT-8 or en_US, + depending on whether your originally (non-supported) locale was UTF-8 or not. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json index 1425455ade8..323292b910e 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", - "subtitle": "Transfer-Files-tofrom-the-HPC", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_10", "next_title": "connecting_paragraph_12", "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc" + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt index 8d0031fcca9..d872c89a0f8 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt @@ -1,12 +1,6 @@ -Transfer Files tofrom the HPC -Using scp -Secure copy or SCP is a tool (command) for securely transferring files between a local -host (= your computer) and a remote host (the HPC). It is based on the -Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., -copy) command, but can copy files to or from remote machines. -It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if -you have symlinks to them in your home directory. See -the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux - for how to do this. -Open an additional terminal window and check that you're working on your -local machine. +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json index 332e6ed2996..8a420f36c2b 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json @@ -1,14 +1,12 @@ { "main_title": "connecting", - "subtitle": "Using-scp", - "title_depth": 3, + "subtitle": "Transfer-Files-tofrom-the-HPC", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" - }, + "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_11", - "next_title": "connecting_macos_paragraph_12.2", + "next_title": "connecting_paragraph_13", "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt deleted file mode 100644 index f1da0677a67..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt +++ /dev/null @@ -1,17 +0,0 @@ -$ hostname - -If you're still using the terminal that is connected to the HPC, close the -connection by typing "exit" in the terminal window. -For example, we will copy the (local) file "localfile.txt" to your -home directory on the HPC cluster. We first generate a small dummy -"localfile.txt", which contains the word "Hello". Use your own VSC -account, which is something like "vsc40000". Don't forget the colon (:) at the -end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your -local filesystem. You can even specify where to save the file on the -remote filesystem by putting a path after the colon. -$ echo "Hello" > localfile.txt -$ ls -l -... --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -$ scp localfile.txt vsc40000@login.hpc.ugent.be: -localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json deleted file mode 100644 index d86cdd989ac..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_12.1", - "next_title": "connecting_macos_paragraph_12.3", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt index 20a4acb40a8..8d0031fcca9 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt @@ -1,15 +1,12 @@ Transfer Files tofrom the HPC -Using a GUI (Cyberduck) -Cyberduck is a graphical alternative to the scp command. It can be -installed from . -This is the one-time setup you will need to do before connecting: -1. After starting Cyberduck, the Bookmark tab will show up. To add a - new bookmark, click on the "+" sign on the bottom left of the - window. A new window will open. -2. In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)". -3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in - your VSC account id (this looks like vsc40000). -4. Select the location of your SSH private key in the "SSH Private Key" field. -5. Finally, type in a name for the bookmark in the "Nickname" field and - close the window by pressing on the red circle in the top left - corner of the window. +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json index bd02ed8502f..9ec843ff0aa 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json @@ -1,11 +1,15 @@ { "main_title": "connecting", - "subtitle": "Using-a-GUI-(Cyberduck)", + "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, "previous_title": "connecting_paragraph_12", "next_title": "connecting_macos_paragraph_13.2", "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt index 1d20edf411f..f1da0677a67 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt @@ -1,3 +1,17 @@ -To open the connection, click on the "Bookmarks" icon (which -resembles an open book) and double-click on the bookmark you just -created. +$ hostname + +If you're still using the terminal that is connected to the HPC, close the +connection by typing "exit" in the terminal window. +For example, we will copy the (local) file "localfile.txt" to your +home directory on the HPC cluster. We first generate a small dummy +"localfile.txt", which contains the word "Hello". Use your own VSC +account, which is something like "vsc40000". Don't forget the colon (:) at the +end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your +local filesystem. You can even specify where to save the file on the +remote filesystem by putting a path after the colon. +$ echo "Hello" > localfile.txt +$ ls -l +... +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +$ scp localfile.txt vsc40000@login.hpc.ugent.be: +localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json index 344ff690d54..dc57de365bf 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", - "subtitle": "Using-a-GUI-(Cyberduck)", + "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "previous_title": "connecting_macos_paragraph_13.1", - "next_title": "connecting_paragraph_14", + "next_title": "connecting_macos_paragraph_13.3", "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json index 4fcc42d2337..5a4623c650d 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_12.2", - "next_title": "connecting_macos_paragraph_12.4", + "previous_title": "connecting_macos_paragraph_13.2", + "next_title": "connecting_macos_paragraph_13.4", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json index 757b533cf8d..54b3fe19d58 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_12.3", - "next_title": "connecting_macos_paragraph_12.5", + "previous_title": "connecting_macos_paragraph_13.3", + "next_title": "connecting_macos_paragraph_13.5", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json index d18c7c7deb5..0b9ba08e3b1 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_12.4", - "next_title": "connecting_macos_paragraph_12.6", + "previous_title": "connecting_macos_paragraph_13.4", + "next_title": "connecting_macos_paragraph_13.6", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json similarity index 65% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json index a8a4f2a3bab..fe899ad9dbc 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", @@ -8,8 +9,8 @@ "0": "", "1": "" }, - "previous_title": "connecting_macos_paragraph_12.5", - "next_title": "connecting_macos_paragraph_12.7", + "previous_title": "connecting_macos_paragraph_13.5", + "next_title": "connecting_macos_paragraph_13.7", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt new file mode 100644 index 00000000000..20a4acb40a8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt @@ -0,0 +1,15 @@ +Transfer Files tofrom the HPC +Using a GUI (Cyberduck) +Cyberduck is a graphical alternative to the scp command. It can be +installed from . +This is the one-time setup you will need to do before connecting: +1. After starting Cyberduck, the Bookmark tab will show up. To add a + new bookmark, click on the "+" sign on the bottom left of the + window. A new window will open. +2. In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)". +3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in + your VSC account id (this looks like vsc40000). +4. Select the location of your SSH private key in the "SSH Private Key" field. +5. Finally, type in a name for the bookmark in the "Nickname" field and + close the window by pressing on the red circle in the top left + corner of the window. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json new file mode 100644 index 00000000000..694b7682aa9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI-(Cyberduck)", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_paragraph_13", + "next_title": "connecting_macos_paragraph_14.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt new file mode 100644 index 00000000000..1d20edf411f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt @@ -0,0 +1,3 @@ +To open the connection, click on the "Bookmarks" icon (which +resembles an open book) and double-click on the bookmark you just +created. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json new file mode 100644 index 00000000000..e32b1ab4c58 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI-(Cyberduck)", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_14.1", + "next_title": "connecting_paragraph_15", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json index e17629a55f3..85b088b0e8c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json index 5c1d808739c..047d5863361 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt index 0863009f290..ca00a8a0f65 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt @@ -1,4 +1,7 @@ Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. 1. Create a new public/private SSH key pair from Putty. Repeat the process described in sectionĀ Generate a public/private key pair. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json index eb4dd3b3a57..4614c053f2c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Applying-for-the-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json index ce74735c538..7dc9b50fbdd 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Get-PuTTY-A-free-telnetSSH-client", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json index 9616b41452a..773acaabf23 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generating-a-publicprivate-key-pair", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt index de5d164bb7a..b082d381a64 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt @@ -27,3 +27,11 @@ Start PuTTYgen.exe it and follow these steps: "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and "id_rsa.ppk" for the private key. +6. Finally, save an "OpenSSH" version of your private key (in + particular for later "X2Go" usage, see x2go) by entering the + "Conversions" menu and selecting "Export OpenSSH key" (do not select the + "force new file format" variant). Save the file in the same location + as in the previous step with filename "id_rsa". (If there is no + "Conversions" menu, you must update your "puttygen" version. If you + want to do this conversion afterwards, you can start with loading an + existing "id_rsa.ppk" and only do this conversions export.) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json index 06b6e998c08..d803aeadb25 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json @@ -1,9 +1,13 @@ { "main_title": "account", "subtitle": "Generating-a-publicprivate-key-pair", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "" + }, "previous_title": "account_windows_paragraph_4.2", "next_title": "account_windows_paragraph_4.4", "OS": "windows", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json index fba810e7299..ebd55060657 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generating-a-publicprivate-key-pair", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json index 69771b48c86..5fd697066b6 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json index 24670708070..46808447a10 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json index d47ad3bd215..e33d002d248 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt index 90c17263cf5..9fd23612756 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt @@ -1,7 +1,8 @@ Applying for the account +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. This file should have been stored in the directory "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh" -After you have uploaded your public key you will receive an e-mail with -a link to confirm your e-mail address. After confirming your e-mail -address the VSC staff will review and if applicable approve your -account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json index d01ac9c3c16..87cda41283f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Applying-for-the-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt deleted file mode 100644 index aaf5a585ebd..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt +++ /dev/null @@ -1,5 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json deleted file mode 100644 index 45c2bd2d90e..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_9", - "next_title": "connecting_paragraph_11", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt index a4f00ba7a5f..5aa8ca03374 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt @@ -1,20 +1,9 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -WinSCP -To transfer files to and from the cluster, we recommend the use of -WinSCP, a graphical file management tool which can transfer files using -secure protocols such as SFTP and SCP. WinSCP is freely available from -. -To transfer your files using WinSCP, -1. Open the program -2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" - 1. Click "New Site". - 2. Enter "login.hpc.ugent.be" in the "Host name" field. - 3. Enter your "vsc-account" in the "User name" field. - 4. Select "SCP" as the "file" protocol. - 5. Note that the password field remains empty. - 6. Click "Advanced...". - 7. Click "SSH > Authentication". - 8. Select your private key in the field "Private key file". -3. Press the "Save" button, to save the session under "Session > Sites" for future access. -4. Finally, when clicking on "Login", you will be asked for your key passphrase. +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Note + If you try to set a non-supported locale, then it will be automatically + set to the default. Currently the default is en_US.UFT-8 or en_US, + depending on whether your originally (non-supported) locale was UTF-8 or not. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json index d9fbc64790a..d4b02dbc9fb 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", - "subtitle": "WinSCP", - "title_depth": 3, + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_10", - "next_title": "connecting_windows_paragraph_11.2", + "next_title": "connecting_paragraph_12", "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt new file mode 100644 index 00000000000..67e5e454852 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt @@ -0,0 +1,22 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +WinSCP +To transfer files to and from the cluster, we recommend the use of +WinSCP, a graphical file management tool which can transfer files using +secure protocols such as SFTP and SCP. WinSCP is freely available from +. +To transfer your files using WinSCP, +1. Open the program +2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" + 1. Click "New Site". + 2. Enter "login.hpc.ugent.be" in the "Host name" field. + 3. Enter your "vsc-account" in the "User name" field. + 4. Select "SCP" as the "file" protocol. + 5. Note that the password field remains empty. + + 6. Click "Advanced...". + 7. Click "SSH > Authentication". + 8. Select your private key in the field "Private key file". +3. Press the "Save" button, to save the session under "Session > Sites" for future access. +4. Finally, when clicking on "Login", you will be asked for your key passphrase. + diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json similarity index 63% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json index 65055dc0764..a4bbaee0f59 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "WinSCP", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_windows_paragraph_11.1", - "next_title": "connecting_windows_paragraph_11.3", + "previous_title": "connecting_paragraph_11", + "next_title": "connecting_windows_paragraph_12.2", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json new file mode 100644 index 00000000000..80a8ef763a1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_windows_paragraph_12.1", + "next_title": "connecting_windows_paragraph_12.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json similarity index 63% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json index dd628f8e8cd..07760730d56 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "WinSCP", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_windows_paragraph_11.2", - "next_title": "connecting_paragraph_12", + "previous_title": "connecting_windows_paragraph_12.2", + "next_title": "connecting_paragraph_13", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt index 69db57957dc..e45f4e63b85 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt @@ -4,6 +4,7 @@ You've generated a public/private key pair with PuTTYgen and have an approved account on the VSC clusters. The next step is to setup the connection to (one of) the HPC. In the screenshots, we show the setup for user +"vsc20167" to the HPC cluster via the login node "login.hpc.ugent.be". 1. Start the PuTTY executable putty.exe in your directory C:\Program Files (x86)\PuTTY and the configuration screen will pop diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json index ef4de8bd8e4..8b6b6f698d1 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Open-a-Terminal", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", From 9e297b18ef9827a20a1283053ad49c3e081044e7 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 15:29:52 +0200 Subject: [PATCH 136/152] new test for links --- scripts/HPC_chatbot_preprocessor/README.md | 6 +- .../chatbot_parser.py | 89 ++++++++++++++----- .../account/account_paragraph_1_metadata.json | 2 +- .../connecting_paragraph_15_metadata.json | 2 +- .../connecting_paragraph_3_metadata.json | 2 +- .../connecting_paragraph_8_metadata.json | 2 +- .../account_linux_paragraph_4.1_metadata.json | 2 +- .../account_linux_paragraph_7.1_metadata.json | 2 +- ...necting_linux_paragraph_13.1_metadata.json | 2 +- ...necting_linux_paragraph_13.6_metadata.json | 4 +- ...nnecting_linux_paragraph_5.1_metadata.json | 2 +- .../account_macos_paragraph_4.1_metadata.json | 2 +- .../account_macos_paragraph_7.1_metadata.json | 2 +- ...ccount_windows_paragraph_4.1_metadata.json | 2 +- ...ccount_windows_paragraph_4.2_metadata.json | 4 +- ...ccount_windows_paragraph_4.3_metadata.json | 2 +- ...ccount_windows_paragraph_6.2_metadata.json | 4 +- ...ecting_windows_paragraph_4.1_metadata.json | 2 +- .../tests/test_links.py | 69 ++++++++++++++ 19 files changed, 158 insertions(+), 44 deletions(-) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_links.py diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 27c1bf3fea6..96a99498451 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -172,7 +172,7 @@ Any comments within the markdown files (for example TODO's) should follow the fo Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such. ``` - + ``` This will be reworked to @@ -190,3 +190,7 @@ Due to the nature of this script, it can generate large directories with very lo ### Markdown lists The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays. + +### Links + +Part of the metadata of the parser are links. In order for the links to be built up in the right way, links to external sites should always start with either `https://` or `http://`. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 3129ccaf566..9aa7dc972e5 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -104,7 +104,7 @@ METADATA_EXTENSION = "_metadata" # Marker for comments for the bot -INPUT_FOR_BOT = "INPUT_FOR_BOT" +INPUT_FOR_BOT = "INPUT_FOR_BOT: " # Standard strings for verbose output LINE = "------------------------------------------------------------------------------------------------------\n" @@ -138,7 +138,46 @@ def check_for_title(line, in_code_block, curr_dirs, options): return 0 -def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): +def make_valid_link(link, main_title, is_linux_tutorial): + """ + Function that converts a string to a valid link to be used in the metadata + + :param link: the input string to be turned into a valid link + :param main_title: the main title of the file that contains the link + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial + :return link: the valid link + """ + + # ugly fix for problem with links + linux_tutorial_files = ["beyond_the_basics", "common_pitfalls", "getting_started", "hpc_infrastructure", "index", "manipulating_files_and_directories", "navigating", "uploading_files"] + if is_linux_tutorial and any([linux_tutorial_files[i] in link for i in range(len(linux_tutorial_files))]): + linux_part = LINUX_TUTORIAL + '/' + else: + linux_part = "" + + if link.startswith('http://') or link.startswith('https://') or link.startswith('mailto:'): + pass + else: + if link.startswith("./"): + link = link.replace('./', '') + elif link.startswith("../"): + link = link.replace('../', '') + + if link.startswith("#"): + link = DOCS_URL + '/' + linux_part + main_title + "/" + link + elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]): + link = DOCS_URL + '/' + linux_part + link.replace(".md", "") + elif '.md#' in link: + link = DOCS_URL + '/' + linux_part + link.replace(".md", "/") + else: + link = DOCS_URL + '/' + linux_part + link + + link = link.replace('index/', '').replace('/index', '') + + return link + + +def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_linux_tutorial): """ function that replaces certain markdown structures with the equivalent used on the website @@ -146,12 +185,13 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): :param linklist: the list used to store links that need to be printed at the end of the file :param in_code_block: boolean indicating whether the current line is part of a code block :param main_title: the main title of the file that is being processed + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return curr_line: the adapted current line :return linklist: the updated linklist """ # replace images with an empty line - if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[]\(img/.*?.png\)', curr_line): + if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[.*?]\(img/.*?\.png\)', curr_line): curr_line = "" # replace links with a reference @@ -159,13 +199,8 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): if matches: for match in matches: curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER) - if ".md" not in match[1]: - if "#" not in match[1]: - linklist.append(match[1]) - else: - linklist.append(DOCS_URL + "/" + main_title.replace(".md", "") + "/" + match[1]) - else: - linklist.append(DOCS_URL + "/" + match[1].replace(".md", "/").replace("index", "").rstrip("/")) + + linklist.append(make_valid_link(match[1], main_title, is_linux_tutorial)) # codeblock (with ``` -> always stands on a separate line, so line can be dropped) if '```' in curr_line: @@ -238,13 +273,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): return curr_line, linklist -def split_text(file, main_title, options, current_paragraph_number=-1, OS=GENERIC): +def split_text(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level :return paragraphs_text: dictionary containing the split sections of text @@ -253,18 +289,19 @@ def split_text(file, main_title, options, current_paragraph_number=-1, OS=GENERI """ if options[SPLIT_ON_TITLES]: - return split_on_titles(file, main_title, options) + return split_on_titles(file, main_title, options, is_linux_tutorial) elif options[SPLIT_ON_PARAGRAPHS]: - return split_on_paragraphs(file, main_title, options, current_paragraph_number, OS) + return split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number, OS) -def split_on_titles(file, main_title, options): +def split_on_titles(file, main_title, options, is_linux_tutorial): """ Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return paragraphs_text: dictionary containing the split sections of text :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance @@ -356,7 +393,7 @@ def split_on_titles(file, main_title, options): # line is not a title elif after_first_title: - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) if line != "\n": current_paragraph += line @@ -366,7 +403,7 @@ def split_on_titles(file, main_title, options): last_dir = curr_dirs[last_title_level] else: previous_contained_if = True - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) if line != "\n": current_paragraph += line @@ -384,13 +421,14 @@ def split_on_titles(file, main_title, options): return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order -def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, OS=GENERIC): +def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph :return paragraphs_text: dictionary containing the split sections of text @@ -524,12 +562,12 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # make a new title metadata_title = make_valid_title(line[title_level + 1:-1]) - line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title, is_linux_tutorial) current_paragraph += line # line is not a title or the beginning of a new paragraph elif line != "\n" or previous_contained_if: - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) current_paragraph += line # keep track of title level and directory to write to metadata upon discovering a new subtitle @@ -538,7 +576,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, last_dir = curr_dirs[last_title_level] else: previous_contained_if = True - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) current_paragraph += line # create a title for the last paragraph @@ -799,7 +837,7 @@ def make_valid_title(title): valid_filename = re.sub(invalid_chars, '', title) # Strip leading/trailing whitespace - valid_filename = valid_filename.strip().strip('-').replace(' ', '-') + valid_filename = valid_filename.strip().strip('-').replace(' ', '-').replace("--", "-") return valid_filename @@ -889,7 +927,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = "" else: os_part = LINK_OS[OS] + "/" - metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') + if "index" not in paragraphs_metadata[title][MAIN_TITLE]: + metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') + else: + metadata[REFERENCE_LINK] = DOCS_URL # write metadata to file with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile: @@ -964,7 +1005,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or writefile.write(jinja_text) # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, is_linux_tutorial, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) # prepare variables to fix metadata total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] @@ -1110,7 +1151,7 @@ def main(options): print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)") # split the text in paragraphs - paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) + paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options, is_linux_tutorial) if options[VERBOSE]: print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata") diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json index cdba091d7df..738d24cb42e 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json @@ -5,7 +5,7 @@ "title_depth": 2, "directory": "account", "links": { - "0": "../sites/hpc_policies" + "0": "https://docs.hpc.ugent.be/sites/hpc_policies" }, "parent_title": "", "previous_title": null, diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json index ff9c22397d1..74ea0125d71 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json @@ -5,7 +5,7 @@ "title_depth": 2, "directory": "connecting", "links": { - "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync" + "0": "https://docs.hpc.ugent.be/linux-tutorial/uploading_files/#copying-faster-with-rsync" }, "parent_title": "", "previous_title": "connecting_paragraph_14", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json index e30467d0799..8d6b1696e08 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "links": { "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + "1": "https://docs.hpc.ugent.be/troubleshooting/#issues-connecting-to-login-node" }, "parent_title": "", "previous_title": "connecting_paragraph_2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json index 074e7e891ce..38f265cfdcd 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json @@ -5,7 +5,7 @@ "title_depth": 2, "directory": "connecting", "links": { - "0": "../useful_linux_commands" + "0": "https://docs.hpc.ugent.be/useful_linux_commands" }, "parent_title": "", "previous_title": "connecting_paragraph_7", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json index bcc0552177d..bc51f39d286 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../../linux-tutorial" + "0": "https://docs.hpc.ugent.be/linux-tutorial" }, "previous_title": "account_paragraph_3", "next_title": "account_paragraph_5", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json index 4214d6cb321..2b3633d71e7 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../connecting" + "0": "https://docs.hpc.ugent.be/connecting" }, "previous_title": "account_paragraph_6", "next_title": "account_linux_paragraph_7.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json index 988c10028d8..6b70790e1e3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "links": { - "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" }, "previous_title": "connecting_paragraph_12", "next_title": "connecting_linux_paragraph_13.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json index 1fc868ffab2..c7fe6bf6a44 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json @@ -6,8 +6,8 @@ "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "links": { - "0": "", - "1": "" + "0": "https://docs.hpc.ugent.be/", + "1": "https://docs.hpc.ugent.be/" }, "previous_title": "connecting_linux_paragraph_13.5", "next_title": "connecting_linux_paragraph_13.7", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json index 55613bca732..66c5dc4aeff 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", "links": { - "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" }, "previous_title": "connecting_paragraph_4", "next_title": "connecting_linux_paragraph_5.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json index 5400014a85c..e3813cb647e 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../../linux-tutorial" + "0": "https://docs.hpc.ugent.be/linux-tutorial" }, "previous_title": "account_paragraph_3", "next_title": "account_paragraph_5", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json index f9b6c751fd4..18b3b3675de 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../connecting" + "0": "https://docs.hpc.ugent.be/connecting" }, "previous_title": "account_paragraph_6", "next_title": "account_macos_paragraph_7.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json index 7dc9b50fbdd..dc5a8cb22b9 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../../linux-tutorial" + "0": "https://docs.hpc.ugent.be/linux-tutorial" }, "previous_title": "account_paragraph_3", "next_title": "account_windows_paragraph_4.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json index 773acaabf23..534ebda0a1c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json @@ -6,8 +6,8 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal", - "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + "0": "https://docs.hpc.ugent.be/connecting/#open-a-terminal", + "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair" }, "previous_title": "account_windows_paragraph_4.1", "next_title": "account_windows_paragraph_4.3", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json index d803aeadb25..4555638639d 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "" + "0": "https://docs.hpc.ugent.be/" }, "previous_title": "account_windows_paragraph_4.2", "next_title": "account_windows_paragraph_4.4", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json index 46808447a10..11c69338029 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json @@ -6,8 +6,8 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair", - "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + "0": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair", + "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair" }, "previous_title": "account_windows_paragraph_6.1", "next_title": "account_windows_paragraph_6.3", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json index 8b6b6f698d1..d3b7d581c94 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", "links": { - "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" }, "previous_title": "connecting_paragraph_3", "next_title": "connecting_paragraph_5", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py new file mode 100644 index 00000000000..d1acca1d740 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_links.py @@ -0,0 +1,69 @@ +import os +import pytest +from urllib import request +from chatbot_parser import main +import json + +whitelist = ["mailto:hpc@ugent.be"] +slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"] + +options_general = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC", + "DESTINATION_DIRECTORY": ".", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 683, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False} +options_os_specific = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC/linux-tutorial", + "DESTINATION_DIRECTORY": "./linux-tutorial", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 683, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False} + + +@pytest.mark.parametrize("options", [options_general, options_os_specific]) +def test_all_links(options): + all_links = {} + main(options) + broken_links = {} + empty_links = {} + + for (dirpath, dirnames, filenames) in os.walk(os.path.join(options['DESTINATION_DIRECTORY'], 'parsed_mds')): + for filename in filenames: + all_links[filename] = [] + if filename.endswith('metadata.json'): + data = json.load(open(os.path.join(dirpath, filename))) + if 'links' in data.keys(): + for key in data['links'].keys(): + all_links[filename].append(data['links'][key]) + all_links[filename].append(data['reference_link'].split("#")[0]) + + for filename in all_links.keys(): + all_links[filename] = list(set(all_links[filename])) + for link in all_links[filename]: + if len(link) != 0: + try: + if link not in whitelist and link not in slow_list: + with request.urlopen(link) as res: + if res.status == 200: + pass + except: + print("Broken link in " + filename + ": " + link) + if filename in broken_links.keys(): + broken_links[filename].append(link) + else: + broken_links[filename] = [link] + else: + print("Empty link in " + filename) + if filename in empty_links.keys(): + empty_links[filename].append(link) + else: + empty_links[filename] = [link] + assert len(empty_links.keys()) == 0 + assert len(broken_links.keys()) == 0 From b6b861044b0b12f06ba9b59ac7406feef07761e8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 16:44:15 +0200 Subject: [PATCH 137/152] new test to make sure lists are kept as one section --- .../chatbot_parser.py | 2 ++ .../generic/account/account_paragraph_1.txt | 2 ++ .../account/account_paragraph_1_metadata.json | 3 ++- .../generic/account/account_paragraph_2.txt | 7 +++-- .../account/account_paragraph_2_metadata.json | 5 ++-- .../generic/account/account_paragraph_3.txt | 8 +++--- .../account/account_paragraph_3_metadata.json | 3 +++ .../account/account_linux_paragraph_4.1.txt | 4 --- .../account_linux_paragraph_4.1_metadata.json | 15 ----------- .../account/account_macos_paragraph_4.1.txt | 4 --- .../account_macos_paragraph_4.1_metadata.json | 15 ----------- .../account/account_macos_paragraph_5.1.txt | 3 +++ .../account/account_macos_paragraph_5.2.txt | 7 ++--- .../account/account_macos_paragraph_5.3.txt | 4 --- ...necting_macos_paragraph_13.1_metadata.json | 2 +- ...necting_macos_paragraph_13.6_metadata.json | 4 +-- ...nnecting_macos_paragraph_5.1_metadata.json | 2 +- .../account/account_windows_paragraph_4.1.txt | 9 ++++--- ...ccount_windows_paragraph_4.1_metadata.json | 3 --- .../account/account_windows_paragraph_4.2.txt | 13 +++++----- .../account/account_windows_paragraph_4.3.txt | 7 ----- .../tests/test_files/list_file/list_test.md | 15 +++++++++++ .../tests/test_lists.py | 26 +++++++++++++++++++ 23 files changed, 84 insertions(+), 79 deletions(-) delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_lists.py diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 9aa7dc972e5..f5e5b452ff5 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -503,6 +503,8 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa pass elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries pass + elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', nxt): + in_list = True elif in_list: if options[VERBOSE]: print("List ended, starting new paragraphs again") diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt index 1b79fd22391..c3f86ade180 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt @@ -11,3 +11,5 @@ Brussels University Association, Antwerp University Association and the University Colleges-Limburg. The VSC is funded by the Flemish Government. There are two methods for connecting to HPC-UGent infrastructure: +- Using a terminal to connect via SSH. +- Using the web portal diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json index 738d24cb42e..52a3ef55568 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json @@ -5,7 +5,8 @@ "title_depth": 2, "directory": "account", "links": { - "0": "https://docs.hpc.ugent.be/sites/hpc_policies" + "0": "https://docs.hpc.ugent.be/sites/hpc_policies", + "1": "https://docs.hpc.ugent.be/web_portal" }, "parent_title": "", "previous_title": null, diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt index 6ecd65e2184..9614ed1447c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt @@ -1,6 +1,9 @@ -- Using a terminal to connect via SSH. -- Using the web portal The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). If you would like use a terminal with SSH as this gives you more flexibility continue reading. However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account. Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal. +The HPC-UGent infrastructure clusters use public/private key pairs for user authentication +(rather than passwords). Technically, the private key is stored on your +local computer and always stays there; the public key is stored on the HPC. +Access to the HPC is granted to anyone who can prove to have access to the +corresponding private key on his local computer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json index 0b22e2986a0..a41a1993674 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json @@ -5,9 +5,8 @@ "title_depth": 2, "directory": "account", "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/account/#applying-for-the-account", - "2": "https://docs.hpc.ugent.be/web_portal" + "0": "https://docs.hpc.ugent.be/account/#applying-for-the-account", + "1": "https://docs.hpc.ugent.be/web_portal" }, "parent_title": "", "previous_title": "account_paragraph_1", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt index e4946869273..963b35c090b 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt @@ -1,8 +1,3 @@ -The HPC-UGent infrastructure clusters use public/private key pairs for user authentication -(rather than passwords). Technically, the private key is stored on your -local computer and always stays there; the public key is stored on the HPC. -Access to the HPC is granted to anyone who can prove to have access to the -corresponding private key on his local computer. How do SSH keys work? - an SSH public/private key pair can be seen as a lock and a key - the SSH public key is equivalent with a lock: you give it to the @@ -15,3 +10,6 @@ How do SSH keys work? locks (SSH public keys) attached to it, and you only need to open one lock with the corresponding key (SSH private key) to open the door (log in to the account). +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json index bd2f73195a6..4df622cc4aa 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json @@ -4,6 +4,9 @@ "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", + "links": { + "0": "https://docs.hpc.ugent.be/linux-tutorial" + }, "parent_title": "", "previous_title": "account_paragraph_2", "next_title": "account_paragraph_4", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt deleted file mode 100644 index 3a282a73a15..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt +++ /dev/null @@ -1,4 +0,0 @@ -How do SSH keys work -Since all VSC clusters use Linux as their main operating system, you -will need to get acquainted with using the command-line interface and -using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json deleted file mode 100644 index bc51f39d286..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "How-do-SSH-keys-work", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial" - }, - "previous_title": "account_paragraph_3", - "next_title": "account_paragraph_5", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#how-do-ssh-keys-work" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt deleted file mode 100644 index 3a282a73a15..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt +++ /dev/null @@ -1,4 +0,0 @@ -How do SSH keys work -Since all VSC clusters use Linux as their main operating system, you -will need to get acquainted with using the command-line interface and -using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json deleted file mode 100644 index e3813cb647e..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "How-do-SSH-keys-work", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial" - }, - "previous_title": "account_paragraph_3", - "next_title": "account_paragraph_5", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#how-do-ssh-keys-work" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt index f3483fcaef1..d96c80b42a2 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt @@ -10,3 +10,6 @@ other secure network services between two networked computers. In short, ssh provides a secure connection between 2 computers via insecure channels (Network, Internet, telephone lines, ...). "Secure" means that: +1. the User is authenticated to the System; and +2. the System is authenticated to the User; and +3. all data is encrypted during transfer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt index 5189a953002..318f913fba3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt @@ -1,6 +1,3 @@ -1. the User is authenticated to the System; and -2. the System is authenticated to the User; and -3. all data is encrypted during transfer. OpenSSH is a FREE implementation of the SSH connectivity protocol. comes with its own implementation of OpenSSH, so you don't need to install any third-party software to use it. Just open a terminal window and jump in! @@ -11,3 +8,7 @@ $ ssh -V OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 To access the clusters and transfer your files, you will use the following commands: +1. ssh-keygen: to generate the SSH key pair (public + private key); +2. ssh: to open a shell on a remote machine; +3. sftp: a secure equivalent of ftp; +4. scp: a secure equivalent of the remote copy command rcp. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt index a8c087f818b..5df90a3dd7c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt @@ -1,7 +1,3 @@ -1. ssh-keygen: to generate the SSH key pair (public + private key); -2. ssh: to open a shell on a remote machine; -3. sftp: a secure equivalent of ftp; -4. scp: a secure equivalent of the remote copy command rcp. Generate a public/private key pair with OpenSSH A key pair might already be present in the default location inside your home directory. Therefore, we first check if a key is available with the diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json index 9ec843ff0aa..79157005600 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "links": { - "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" }, "previous_title": "connecting_paragraph_12", "next_title": "connecting_macos_paragraph_13.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json index fe899ad9dbc..9b08fbde549 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json @@ -6,8 +6,8 @@ "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "links": { - "0": "", - "1": "" + "0": "https://docs.hpc.ugent.be/", + "1": "https://docs.hpc.ugent.be/" }, "previous_title": "connecting_macos_paragraph_13.5", "next_title": "connecting_macos_paragraph_13.7", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json index 85b088b0e8c..f928fbfcdd6 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", "links": { - "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" }, "previous_title": "connecting_paragraph_4", "next_title": "connecting_macos_paragraph_5.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt index 1e70493305f..93ca7ac9da5 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt @@ -1,7 +1,4 @@ How do SSH keys work -Since all VSC clusters use Linux as their main operating system, you -will need to get acquainted with using the command-line interface and -using the terminal (see tutorial). A typical Windows environment does not come with pre-installed software to connect and run command-line executables on a HPC. Some tools need to be installed on your Windows machine first, before we can start the actual @@ -13,3 +10,9 @@ PuTTYgen executable and run it. This can be useful in situations where you do not have the required permissions to install software on the computer you are using. Alternatively, an installation package is also available. +You can download PuTTY from the official address: +. You +probably want the 64-bits version. If you can install software on your +computer, you can use the "Package files", if not, you can download and +use putty.exe and puttygen.exe in the "Alternative binary files" +section. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json index dc5a8cb22b9..e0024f40d55 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -5,9 +5,6 @@ "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial" - }, "previous_title": "account_paragraph_3", "next_title": "account_windows_paragraph_4.2", "OS": "windows", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt index 1a30a219fec..cebd1da3baf 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt @@ -1,12 +1,13 @@ -You can download PuTTY from the official address: -. You -probably want the 64-bits version. If you can install software on your -computer, you can use the "Package files", if not, you can download and -use putty.exe and puttygen.exe in the "Alternative binary files" -section. The PuTTY package consists of several components, but we'll only use two: 1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) 2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, see Generate a public/private key pair) Generating a public/private key pair +Before requesting a VSC account, you need to generate a pair of ssh +keys. You need 2 keys, a public and a private key. You can visualise the +public key as a lock to which only you have the key (your private key). +You can send a copy of your lock to anyone without any problems, because +only you can open it, as long as you keep your private key secure. To +generate a public/private key pair, you can use the PuTTYgen key +generator. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt index b082d381a64..6e65300562d 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt @@ -1,10 +1,3 @@ -Before requesting a VSC account, you need to generate a pair of ssh -keys. You need 2 keys, a public and a private key. You can visualise the -public key as a lock to which only you have the key (your private key). -You can send a copy of your lock to anyone without any problems, because -only you can open it, as long as you keep your private key secure. To -generate a public/private key pair, you can use the PuTTYgen key -generator. Start PuTTYgen.exe it and follow these steps: 1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of bits in the key to 4096. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md new file mode 100644 index 00000000000..1d1d3c210e8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md @@ -0,0 +1,15 @@ +# Title + +Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list. + +1. First entry + +2. Second entry + +3. Third entry + + ![image](img/an_image_for_the_third_entry.png) + +4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit. + +And now the text continues like normal in a new section. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py new file mode 100644 index 00000000000..4975856a75f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py @@ -0,0 +1,26 @@ +import pytest +from chatbot_parser import split_on_paragraphs + + +@pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [ + ("./test_files/list_file/list_test.md", + "list_test.md", + { + "SOURCE_DIRECTORY": "./test_files/list_file", + "DESTINATION_DIRECTORY": "./test_files/list_file", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 100, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False + }, + False, + { + 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", + 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'} + ) +]) +def test_links(file, main_title, options, is_linux_tutorial, expected_text): + assert split_on_paragraphs(file, main_title, options, is_linux_tutorial)[1] == expected_text From 57a21397a869cbcffb6fab5f4d14496043b9b174 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 16:49:24 +0200 Subject: [PATCH 138/152] updated test_file for list test --- .../tests/test_files/list_file/list_test.md | 2 +- .../tests/test_lists.py | 23 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md index 1d1d3c210e8..1e18a1495d5 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md @@ -2,7 +2,7 @@ Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list. -1. First entry +1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list. 2. Second entry diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py index 4975856a75f..06e56a5cb2c 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py @@ -6,20 +6,21 @@ ("./test_files/list_file/list_test.md", "list_test.md", { - "SOURCE_DIRECTORY": "./test_files/list_file", - "DESTINATION_DIRECTORY": "./test_files/list_file", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 100, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False + "SOURCE_DIRECTORY": "./test_files/list_file", + "DESTINATION_DIRECTORY": "./test_files/list_file", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 100, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False }, False, { - 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", - 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'} + 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", + 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.' + } ) ]) def test_links(file, main_title, options, is_linux_tutorial, expected_text): From 170a10cb9eaa0d92482daef766dd2b0918e9a4cd Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 09:53:12 +0200 Subject: [PATCH 139/152] dropped <> around links and started new function to calculate length of paragraphs --- .../HPC_chatbot_preprocessor/chatbot_parser.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index f5e5b452ff5..cff487f8589 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -235,6 +235,10 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_ elif re.fullmatch(r'!--.*?--', content): curr_line = re.sub(r'<.*?>', "", curr_line) + # drop the <> around links + elif re.match(r'http://', content) or re.match(r'https://', content): + curr_line = re.sub(r'<' + content + '>', content, curr_line ) + # keep the rest else: pass @@ -527,7 +531,7 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa title_level = check_for_title(line, in_code_block, curr_dirs, options) # check whether a new paragraph should be started - if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block and not in_list: + if line == "\n" and paragraph_long_enough(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph), options) and not in_code_block and not in_list: # create a title for the previous paragraph if current_paragraph_number == -1: @@ -602,6 +606,18 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order +def paragraph_long_enough(paragraph, options): + """ + Function that checks if the paragraph is long enough to be split of + + :param paragraph: current paragraph + :param options: dictionary containing the options given by the user + :return: + """ + # TODO: change this into something that uses the tokenizer + return len(paragraph) >= options[MIN_PARAGRAPH_LENGTH] + + def write_metadata(main_title, subtitle, links, title_level, directory, source_file): """ Function that writes metadata about a text section to a dictionary From 04efff6ca40a3b19f694e8168d83a77d45a1078b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 10:10:49 +0200 Subject: [PATCH 140/152] removed parsed mds --- .../generic/account/account_paragraph_1.txt | 15 ------- .../generic/account/account_paragraph_10.txt | 18 -------- .../account_paragraph_10_metadata.json | 12 ------ .../generic/account/account_paragraph_12.txt | 14 ------- .../account_paragraph_12_metadata.json | 12 ------ .../account/account_paragraph_1_metadata.json | 16 ------- .../generic/account/account_paragraph_2.txt | 9 ---- .../account/account_paragraph_2_metadata.json | 16 ------- .../generic/account/account_paragraph_3.txt | 15 ------- .../account/account_paragraph_3_metadata.json | 15 ------- .../generic/account/account_paragraph_8.txt | 14 ------- .../account/account_paragraph_8_metadata.json | 12 ------ .../compiling_your_software_paragraph_1.txt | 10 ----- .../compiling_your_software_paragraph_10.txt | 19 --------- ...g_your_software_paragraph_10_metadata.json | 11 ----- .../compiling_your_software_paragraph_11.txt | 20 --------- ...g_your_software_paragraph_11_metadata.json | 11 ----- .../compiling_your_software_paragraph_12.txt | 9 ---- ...g_your_software_paragraph_12_metadata.json | 11 ----- ...ng_your_software_paragraph_1_metadata.json | 11 ----- .../compiling_your_software_paragraph_2.txt | 13 ------ ...ng_your_software_paragraph_2_metadata.json | 11 ----- .../compiling_your_software_paragraph_3.txt | 13 ------ ...ng_your_software_paragraph_3_metadata.json | 11 ----- .../compiling_your_software_paragraph_4.txt | 15 ------- ...ng_your_software_paragraph_4_metadata.json | 11 ----- .../compiling_your_software_paragraph_5.txt | 16 ------- ...ng_your_software_paragraph_5_metadata.json | 11 ----- .../compiling_your_software_paragraph_6.txt | 30 ------------- ...ng_your_software_paragraph_6_metadata.json | 14 ------- .../compiling_your_software_paragraph_7.txt | 15 ------- ...ng_your_software_paragraph_7_metadata.json | 11 ----- .../compiling_your_software_paragraph_8.txt | 19 --------- ...ng_your_software_paragraph_8_metadata.json | 11 ----- .../compiling_your_software_paragraph_9.txt | 32 -------------- ...ng_your_software_paragraph_9_metadata.json | 11 ----- .../connecting/connecting_paragraph_1.txt | 18 -------- .../connecting/connecting_paragraph_10.txt | 24 ----------- .../connecting_paragraph_10_metadata.json | 12 ------ .../connecting/connecting_paragraph_15.txt | 7 ---- .../connecting_paragraph_15_metadata.json | 15 ------- .../connecting/connecting_paragraph_16.txt | 11 ----- .../connecting_paragraph_16_metadata.json | 16 ------- .../connecting_paragraph_1_metadata.json | 15 ------- .../connecting/connecting_paragraph_2.txt | 18 -------- .../connecting_paragraph_2_metadata.json | 12 ------ .../connecting/connecting_paragraph_3.txt | 9 ---- .../connecting_paragraph_3_metadata.json | 16 ------- .../connecting/connecting_paragraph_6.txt | 14 ------- .../connecting_paragraph_6_metadata.json | 12 ------ .../connecting/connecting_paragraph_7.txt | 21 ---------- .../connecting_paragraph_7_metadata.json | 12 ------ .../connecting/connecting_paragraph_8.txt | 12 ------ .../connecting_paragraph_8_metadata.json | 15 ------- .../connecting/connecting_paragraph_9.txt | 19 --------- .../connecting_paragraph_9_metadata.json | 12 ------ .../account/account_linux_paragraph_11.1.txt | 17 -------- ...account_linux_paragraph_11.1_metadata.json | 15 ------- .../account/account_linux_paragraph_5.1.txt | 14 ------- .../account_linux_paragraph_5.1_metadata.json | 12 ------ .../account/account_linux_paragraph_5.2.txt | 14 ------- .../account_linux_paragraph_5.2_metadata.json | 12 ------ .../account/account_linux_paragraph_5.3.txt | 16 ------- .../account_linux_paragraph_5.3_metadata.json | 12 ------ .../account/account_linux_paragraph_5.4.txt | 13 ------ .../account_linux_paragraph_5.4_metadata.json | 12 ------ .../account/account_linux_paragraph_5.5.txt | 6 --- .../account_linux_paragraph_5.5_metadata.json | 12 ------ .../account/account_linux_paragraph_6.1.txt | 1 - .../account_linux_paragraph_6.1_metadata.json | 12 ------ .../account/account_linux_paragraph_7.1.txt | 14 ------- .../account_linux_paragraph_7.1_metadata.json | 15 ------- .../account/account_linux_paragraph_7.2.txt | 8 ---- .../account_linux_paragraph_7.2_metadata.json | 12 ------ .../account/account_linux_paragraph_9.1.txt | 7 ---- .../account_linux_paragraph_9.1_metadata.json | 12 ------ .../connecting_linux_paragraph_11.1.txt | 37 ---------------- ...necting_linux_paragraph_11.1_metadata.json | 12 ------ .../connecting_linux_paragraph_12.1.txt | 6 --- ...necting_linux_paragraph_12.1_metadata.json | 12 ------ .../connecting_linux_paragraph_13.1.txt | 12 ------ ...necting_linux_paragraph_13.1_metadata.json | 15 ------- .../connecting_linux_paragraph_13.2.txt | 17 -------- ...necting_linux_paragraph_13.2_metadata.json | 12 ------ .../connecting_linux_paragraph_13.3.txt | 22 ---------- ...necting_linux_paragraph_13.3_metadata.json | 12 ------ .../connecting_linux_paragraph_13.4.txt | 14 ------- ...necting_linux_paragraph_13.4_metadata.json | 12 ------ .../connecting_linux_paragraph_13.5.txt | 14 ------- ...necting_linux_paragraph_13.5_metadata.json | 12 ------ .../connecting_linux_paragraph_13.6.txt | 18 -------- ...necting_linux_paragraph_13.6_metadata.json | 16 ------- .../connecting_linux_paragraph_14.1.txt | 10 ----- ...necting_linux_paragraph_14.1_metadata.json | 12 ------ .../connecting_linux_paragraph_5.1.txt | 12 ------ ...nnecting_linux_paragraph_5.1_metadata.json | 15 ------- .../connecting_linux_paragraph_5.2.txt | 4 -- ...nnecting_linux_paragraph_5.2_metadata.json | 12 ------ .../account/account_macos_paragraph_11.1.txt | 17 -------- ...account_macos_paragraph_11.1_metadata.json | 15 ------- .../account/account_macos_paragraph_5.1.txt | 15 ------- .../account_macos_paragraph_5.1_metadata.json | 12 ------ .../account/account_macos_paragraph_5.2.txt | 14 ------- .../account_macos_paragraph_5.2_metadata.json | 12 ------ .../account/account_macos_paragraph_5.3.txt | 16 ------- .../account_macos_paragraph_5.3_metadata.json | 12 ------ .../account/account_macos_paragraph_5.4.txt | 13 ------ .../account_macos_paragraph_5.4_metadata.json | 12 ------ .../account/account_macos_paragraph_5.5.txt | 6 --- .../account_macos_paragraph_5.5_metadata.json | 12 ------ .../account/account_macos_paragraph_6.1.txt | 1 - .../account_macos_paragraph_6.1_metadata.json | 12 ------ .../account/account_macos_paragraph_7.1.txt | 14 ------- .../account_macos_paragraph_7.1_metadata.json | 15 ------- .../account/account_macos_paragraph_7.2.txt | 7 ---- .../account_macos_paragraph_7.2_metadata.json | 12 ------ .../account/account_macos_paragraph_9.1.txt | 12 ------ .../account_macos_paragraph_9.1_metadata.json | 12 ------ .../connecting_macos_paragraph_11.1.txt | 37 ---------------- ...necting_macos_paragraph_11.1_metadata.json | 12 ------ .../connecting_macos_paragraph_12.1.txt | 6 --- ...necting_macos_paragraph_12.1_metadata.json | 12 ------ .../connecting_macos_paragraph_13.1.txt | 12 ------ ...necting_macos_paragraph_13.1_metadata.json | 15 ------- .../connecting_macos_paragraph_13.2.txt | 17 -------- ...necting_macos_paragraph_13.2_metadata.json | 12 ------ .../connecting_macos_paragraph_13.3.txt | 22 ---------- ...necting_macos_paragraph_13.3_metadata.json | 12 ------ .../connecting_macos_paragraph_13.4.txt | 14 ------- ...necting_macos_paragraph_13.4_metadata.json | 12 ------ .../connecting_macos_paragraph_13.5.txt | 14 ------- ...necting_macos_paragraph_13.5_metadata.json | 12 ------ .../connecting_macos_paragraph_13.6.txt | 18 -------- ...necting_macos_paragraph_13.6_metadata.json | 16 ------- .../connecting_macos_paragraph_14.1.txt | 15 ------- ...necting_macos_paragraph_14.1_metadata.json | 12 ------ .../connecting_macos_paragraph_14.2.txt | 3 -- ...necting_macos_paragraph_14.2_metadata.json | 12 ------ .../connecting_macos_paragraph_5.1.txt | 10 ----- ...nnecting_macos_paragraph_5.1_metadata.json | 15 ------- .../connecting_macos_paragraph_5.2.txt | 7 ---- ...nnecting_macos_paragraph_5.2_metadata.json | 12 ------ .../account_windows_paragraph_11.1.txt | 17 -------- ...count_windows_paragraph_11.1_metadata.json | 15 ------- .../account/account_windows_paragraph_4.1.txt | 18 -------- ...ccount_windows_paragraph_4.1_metadata.json | 12 ------ .../account/account_windows_paragraph_4.2.txt | 13 ------ ...ccount_windows_paragraph_4.2_metadata.json | 16 ------- .../account/account_windows_paragraph_4.3.txt | 30 ------------- ...ccount_windows_paragraph_4.3_metadata.json | 15 ------- .../account/account_windows_paragraph_4.4.txt | 2 - ...ccount_windows_paragraph_4.4_metadata.json | 12 ------ .../account/account_windows_paragraph_6.1.txt | 13 ------ ...ccount_windows_paragraph_6.1_metadata.json | 12 ------ .../account/account_windows_paragraph_6.2.txt | 11 ----- ...ccount_windows_paragraph_6.2_metadata.json | 16 ------- .../account/account_windows_paragraph_6.3.txt | 5 --- ...ccount_windows_paragraph_6.3_metadata.json | 12 ------ .../account/account_windows_paragraph_9.1.txt | 8 ---- ...ccount_windows_paragraph_9.1_metadata.json | 12 ------ .../connecting_windows_paragraph_11.1.txt | 9 ---- ...cting_windows_paragraph_11.1_metadata.json | 12 ------ .../connecting_windows_paragraph_12.1.txt | 22 ---------- ...cting_windows_paragraph_12.1_metadata.json | 12 ------ .../connecting_windows_paragraph_12.2.txt | 11 ----- ...cting_windows_paragraph_12.2_metadata.json | 12 ------ .../connecting_windows_paragraph_12.3.txt | 6 --- ...cting_windows_paragraph_12.3_metadata.json | 12 ------ .../connecting_windows_paragraph_4.1.txt | 42 ------------------- ...ecting_windows_paragraph_4.1_metadata.json | 15 ------- 170 files changed, 2310 deletions(-) delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt deleted file mode 100644 index c3f86ade180..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt +++ /dev/null @@ -1,15 +0,0 @@ -Getting an HPC Account -Getting ready to request an account -All users of AUGent can request -an -account on the HPC, which is part of the Flemish Supercomputing Centre (VSC). -See HPC policies for more information on who is entitled to an account. -The VSC, abbreviation of Flemish Supercomputer Centre, is a virtual -supercomputer centre. It is a partnership between the five Flemish -associations: the Association KUĀ Leuven, Ghent University Association, -Brussels University Association, Antwerp University Association and the -University Colleges-Limburg. The VSC is funded by the Flemish -Government. -There are two methods for connecting to HPC-UGent infrastructure: -- Using a terminal to connect via SSH. -- Using the web portal diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt deleted file mode 100644 index 7b0a39279e4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt +++ /dev/null @@ -1,18 +0,0 @@ -After you have uploaded your public key you will receive an e-mail with -a link to confirm your e-mail address. After confirming your e-mail -address the VSC staff will review and if applicable approve your -account. -Welcome e-mail -Within one day, you should receive a Welcome e-mail with your VSC -account details. -Dear (Username), -Your VSC-account has been approved by an administrator. -Your vsc-username is vsc40000 -Your account should be fully active within one hour. -To check or update your account information please visit -https://account.vscentrum.be/ -For further info please visit https://www.vscentrum.be/user-portal -Kind regards, --- The VSC administrators -Now, you can start using the HPC. You can always look up your VSC id later -by visiting . diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json deleted file mode 100644 index e417029c16f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Welcome-e-mail", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "", - "previous_title": "account_paragraph_9", - "next_title": "account_paragraph_11", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#welcome-e-mail" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt deleted file mode 100644 index 7ecd78e5c9f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt +++ /dev/null @@ -1,14 +0,0 @@ -Computation Workflow on the HPC -A typical Computation workflow will be: -1. Connect to the HPC -2. Transfer your files to the HPC -3. Compile your code and test it -4. Create a job script -5. Submit your job -6. Wait while - 1. your job gets into the queue - 2. your job gets executed - 3. your job finishes -7. Move your results -We'll take you through the different tasks one by one in the following -chapters. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json deleted file mode 100644 index e43e729aa74..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Computation-Workflow-on-the-HPC", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "", - "previous_title": "account_paragraph_11", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#computation-workflow-on-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json deleted file mode 100644 index 52a3ef55568..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Getting-ready-to-request-an-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "links": { - "0": "https://docs.hpc.ugent.be/sites/hpc_policies", - "1": "https://docs.hpc.ugent.be/web_portal" - }, - "parent_title": "", - "previous_title": null, - "next_title": "account_paragraph_2", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt deleted file mode 100644 index 9614ed1447c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt +++ /dev/null @@ -1,9 +0,0 @@ -The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). -If you would like use a terminal with SSH as this gives you more flexibility continue reading. -However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account. -Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal. -The HPC-UGent infrastructure clusters use public/private key pairs for user authentication -(rather than passwords). Technically, the private key is stored on your -local computer and always stays there; the public key is stored on the HPC. -Access to the HPC is granted to anyone who can prove to have access to the -corresponding private key on his local computer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json deleted file mode 100644 index a41a1993674..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Getting-ready-to-request-an-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#applying-for-the-account", - "1": "https://docs.hpc.ugent.be/web_portal" - }, - "parent_title": "", - "previous_title": "account_paragraph_1", - "next_title": "account_paragraph_3", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt deleted file mode 100644 index 963b35c090b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt +++ /dev/null @@ -1,15 +0,0 @@ -How do SSH keys work? -- an SSH public/private key pair can be seen as a lock and a key -- the SSH public key is equivalent with a lock: you give it to the - VSC and they put it on the door that gives access to your account. -- the SSH private key is like a physical key: you don't hand it out - to other people. -- anyone who has the key (and the optional password) can unlock the - door and log in to the account. -- the door to your VSC account is special: it can have multiple - locks (SSH public keys) attached to it, and you only need to open - one lock with the corresponding key (SSH private key) to open - the door (log in to the account). -Since all VSC clusters use Linux as their main operating system, you -will need to get acquainted with using the command-line interface and -using the terminal (see tutorial). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json deleted file mode 100644 index 4df622cc4aa..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "How-do-SSH-keys-work", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial" - }, - "parent_title": "", - "previous_title": "account_paragraph_2", - "next_title": "account_paragraph_4", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#how-do-ssh-keys-work" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt deleted file mode 100644 index 6c5695dfff3..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt +++ /dev/null @@ -1,14 +0,0 @@ -Applying for the account -Visit -You will be redirected to our WAYF (Where Are You From) service where -you have to select your "Home Organisation". -Select "UGent" in the dropdown box and optionally select "Save my preference" -and "permanently". -Click "Confirm" -You will now be taken to the authentication page of your institute. -You will now have to log in with CAS using your UGent account. -You either have a login name of maximum 8 characters, or a (non-UGent) -email address if you are an external user. In case of problems with your -UGent password, please visit: . After -logging in, you may be requested to share your information. Click "Yes, -continue". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json deleted file mode 100644 index 6a77c48dbd1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Applying-for-the-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "", - "previous_title": "account_paragraph_7", - "next_title": "account_paragraph_9", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#applying-for-the-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt deleted file mode 100644 index db1afd43e68..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt +++ /dev/null @@ -1,10 +0,0 @@ -Compiling and testing your software on the HPC -All nodes in the HPC cluster are running the "RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty)" -Operating system, which is a specific version of Red Hat Enterprise Linux. This means that all the -software programs -(executable) that the end-user wants to run on the HPC first must be -compiled for RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). It also means that you first have to install all the -required external software packages on the HPC. -Most commonly used compilers are already pre-installed on the HPC and can be -used straight away. Also, many popular external software packages, which -are regularly used in the scientific community, are also pre-installed. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt deleted file mode 100644 index d49ba76b01a..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt +++ /dev/null @@ -1,19 +0,0 @@ -The "mpi_hello.c" program is a simple source file, written in C with MPI -library calls. -Then, check the command line options for *"mpicc" (GNU C-Compiler with -MPI extensions)*, then we compile and list the contents of the directory -again: -mpicc --help -mpicc -o mpihello mpihello.c -ls -l -A new file "hello" has been created. Note that this program has -"execute" rights. -Let's test this program on the "login" node first: -$ ./mpihello -Hello World from Node 0. -It seems to work, now run it on the HPC. -qsub mpihello.pbs -Compiling a parallel program in Intel Parallel Studio Cluster Edition -We will now compile the same program, but using the Intel Parallel -Studio Cluster Edition compilers. We stay in the examples directory for -this chapter: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json deleted file mode 100644 index ca0d7d80669..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_9", - "next_title": "compiling_your_software_paragraph_11", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt deleted file mode 100644 index be02d069ac7..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt +++ /dev/null @@ -1,20 +0,0 @@ -cd ~/examples/Compiling-and-testing-your-software-on-the-HPC -We will compile this C/MPI -file into an executable with the Intel -Parallel Studio Cluster Edition. First, clear the modules (purge) and -then load the latest "intel" module: -module purge -module load intel -Then, compile and list the contents of the directory again. The Intel -equivalent of mpicc is mpiicc. -mpiicc -o mpihello mpihello.c -ls -l -Note that the old "mpihello" file has been overwritten. Let's test this -program on the "login" node first: -$ ./mpihello -Hello World from Node 0. -It seems to work, now run it on the HPC. -qsub mpihello.pbs -Note: The AUGent only has a license for the Intel Parallel Studio Cluster -Edition for a fixed number of users. As such, it might happen that you -have to wait a few minutes before a floating license becomes available -for your use. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json deleted file mode 100644 index 808331a3f9d..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_10", - "next_title": "compiling_your_software_paragraph_12", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt deleted file mode 100644 index 1d37014a426..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt +++ /dev/null @@ -1,9 +0,0 @@ -Note: The Intel Parallel Studio Cluster Edition contains equivalent -compilers for all GNU compilers. Hereafter the overview for C, C++ and -Fortran compilers. -| | Sequential Program | | **Parallel Program (with MPI)** | | -|-------------|------------------------|-----------|---------------------------------|-----------| -| | GNU | Intel | GNU | Intel | -| C | gcc | icc | mpicc | mpiicc | -| **C++** | g++ | icpc | mpicxx | mpiicpc | -| Fortran | gfortran | ifort | mpif90 | mpiifort | \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json deleted file mode 100644 index d032428daf1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_11", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json deleted file mode 100644 index ec4b55c9a4d..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-and-testing-your-software-on-the-HPC", - "title_depth": 1, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": null, - "next_title": "compiling_your_software_paragraph_2", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-testing-your-software-on-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt deleted file mode 100644 index b52639b649d..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt +++ /dev/null @@ -1,13 +0,0 @@ -Check the pre-installed software on the HPC -In order to check all the available modules and their version numbers, -which are pre-installed on the HPC enter: -When your required application is not available on the HPC please contact -any HPC member. Be aware of potential "License Costs". "Open Source" -software is often preferred. -Porting your code -To port a software-program is to translate it from the operating system in -which it was developed (e.g., Windows 7) to another operating system -(e.g., Red Hat Enterprise Linux on our HPC) so that it can be used there. Porting implies some -degree of effort, but not nearly as much as redeveloping the program in -the new environment. It all depends on how "portable" you wrote your -code. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json deleted file mode 100644 index 00750c81d97..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Porting-your-code", - "title_depth": 2, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_1", - "next_title": "compiling_your_software_paragraph_3", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt deleted file mode 100644 index f994f0bc148..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt +++ /dev/null @@ -1,13 +0,0 @@ -In the simplest case the file or files may simply be copied from one -machine to the other. However, in many cases the software is installed -on a computer in a way, which depends upon its detailed hardware, -software, and setup, with device drivers for particular devices, using -installed operating system and supporting software components, and using -different directories. -In some cases software, usually described as "portable software" is -specifically designed to run on different computers with compatible -operating systems and processors without any machine-dependent -installation; it is sufficient to transfer specified directories and -their contents. Hardware- and software-specific information is often -stored in configuration files in specified locations (e.g., the registry -on machines running MS Windows). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json deleted file mode 100644 index 90e7d236beb..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Porting-your-code", - "title_depth": 2, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_2", - "next_title": "compiling_your_software_paragraph_4", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt deleted file mode 100644 index f7bf4172b71..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt +++ /dev/null @@ -1,15 +0,0 @@ -Software, which is not portable in this sense, will have to be -transferred with modifications to support the environment on the -destination machine. -Whilst programming, it would be wise to stick to certain standards -(e.g., ISO/ANSI/POSIX). This will ease the porting of your code to other -platforms. -Porting your code to the RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty) platform is the responsibility of the end-user. -Compiling and building on the HPC -Compiling refers to the process of translating code written in some -programming language, e.g., Fortran, C, or C++, to machine code. -Building is similar, but includes gluing together the machine code -resulting from different source files into an executable (or library). -The text below guides you through some basic problems typical for small -software projects. For larger projects it is more appropriate to use -makefiles or even an advanced build system like CMake. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json deleted file mode 100644 index b7c9ef0f71b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-and-building-on-the-HPC", - "title_depth": 2, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_3", - "next_title": "compiling_your_software_paragraph_5", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt deleted file mode 100644 index 342262b9264..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt +++ /dev/null @@ -1,16 +0,0 @@ -All the HPC nodes run the same version of the Operating System, i.e. RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). So, -it is sufficient to compile your program on any compute node. Once you -have generated an executable with your compiler, this executable should -be able to run on any other compute-node. -A typical process looks like: -1. Copy your software to the login-node of the HPC -2. Start an interactive session on a compute node; -3. Compile it; -4. Test it locally; -5. Generate your job scripts; -6. Test it on the HPC -7. Run it (in parallel); -We assume you've copied your software to the HPC. The next step is to request -your private compute node. -$ qsub -I -qsub: waiting for job 123456 to start diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json deleted file mode 100644 index 02a8fad0ae2..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-and-building-on-the-HPC", - "title_depth": 2, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_4", - "next_title": "compiling_your_software_paragraph_6", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt deleted file mode 100644 index 7ebde664878..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt +++ /dev/null @@ -1,30 +0,0 @@ -Compiling a sequential program in C -Go to the examples for chapter -Compiling and testing your software on the HPC and load the -foss module: -cd ~/examples/Compiling-and-testing-your-software-on-the-HPC -module load foss -We now list the directory and explore the contents of the "hello.c" -program: -$ ls -l -total 512 --rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c --rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* --rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c --rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs -/* - * VSC : Flemish Supercomputing Centre - * Tutorial : Introduction to HPC - * Description: Print 500 numbers, whilst waiting 1 second in between - */ -#include "stdio.h" -int main( int argc, char *argv[] ) -{ - int i; - for (i=0; i<500; i++) - { - printf("Hello #%d\n", i); - fflush(stdout); - sleep(1); - } -} diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json deleted file mode 100644 index 16942249583..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-sequential-program-in-C", - "title_depth": 3, - "directory": "compiling_your_software", - "links": { - "0": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" - }, - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_5", - "next_title": "compiling_your_software_paragraph_7", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt deleted file mode 100644 index 1d58d0d6ae4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt +++ /dev/null @@ -1,15 +0,0 @@ -The "hello.c" program is a simple source file, written in C. It'll print -500 times "Hello #<num>", and waits one second between 2 printouts. -We first need to compile this C-file into an executable with the -gcc-compiler. -First, check the command line options for *"gcc" (GNU C-Compiler)*, then -we compile. the O2 option enables a moderate level of optimization when compiling the code. -It instructs the compiler to optimize the code for better performance without significantly increasing compilation time. -Finally, list the contents of the directory again: -$ gcc -help -$ gcc -O2 -o hello hello.c -$ ls -l -total 512 --rwxrwxr-x 1 vsc40000 7116 Sep 16 11:43 hello* --rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c --rwxr-xr-x 1 vsc40000 130 Sep 16 11:39 hello.pbs* diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json deleted file mode 100644 index e5f3161c3f2..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-sequential-program-in-C", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_6", - "next_title": "compiling_your_software_paragraph_8", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt deleted file mode 100644 index 5ca5de1e6d4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt +++ /dev/null @@ -1,19 +0,0 @@ -A new file "hello" has been created. Note that this file has "execute" -rights, i.e., it is an executable. More often than not, calling gcc -- -or any other compiler for that matter -- will provide you with a list of -errors and warnings referring to mistakes the programmer made, such as -typos, syntax errors. You will have to correct them first in order to -make the code compile. Warnings pinpoint less crucial issues that may -relate to performance problems, using unsafe or obsolete language -features, etc. It is good practice to remove all warnings from a -compilation process, even if they seem unimportant so that a code change -that produces a warning does not go unnoticed. -Let's test this program on the local compute node, which is at your -disposal after the qsub --I command: -$ ./hello -Hello #0 -Hello #1 -Hello #2 -Hello #3 -Hello #4 -... diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json deleted file mode 100644 index 942949951d1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-sequential-program-in-C", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_7", - "next_title": "compiling_your_software_paragraph_9", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt deleted file mode 100644 index 28982d2bd95..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt +++ /dev/null @@ -1,32 +0,0 @@ -It seems to work, now run it on the HPC -qsub hello.pbs -Compiling a parallel program in C/MPI -cd ~/examples/Compiling-and-testing-your-software-on-the-HPC -List the directory and explore the contents of the "mpihello.c" -program: -$ ls -l -total 512 -total 512 --rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c --rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* --rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c --rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs -/* - * VSC : Flemish Supercomputing Centre - * Tutorial : Introduction to HPC - * Description: Example program, to compile with MPI - */ -#include -#include -main(int argc, char **argv) -{ - int node, i, j; - float f; - MPI_Init(&argc,&argv); - MPI_Comm_rank(MPI_COMM_WORLD, &node); - - printf("Hello World from Node %d.\n", node); - for (i=0; i<=100000; i++) - f=i*2.718281828*i+i+i*3.141592654; - MPI_Finalize(); -} diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json deleted file mode 100644 index fe51e423a96..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-parallel-program-in-CMPI", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_8", - "next_title": "compiling_your_software_paragraph_10", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-cmpi" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt deleted file mode 100644 index bc5a1f80140..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt +++ /dev/null @@ -1,18 +0,0 @@ -Connecting to the HPC infrastructure -Before you can really start using the HPC clusters, there are several things -you need to do or know: -1. You need to log on to the cluster using an SSH client to one of - the login nodes or by using the HPC web portal. - This will give you command-line access. - A standard web browser like Firefox or Chrome for the web portal will suffice. -2. Before you can do some work, you'll have to transfer the files - that you need from your desktop computer to the cluster. At the end - of a job, you might want to transfer some files back. -3. Optionally, if you wish to use programs with a **graphical user - interface**, you will need an X-server on your client system and log - in to the login nodes with X-forwarding enabled. -4. Often several versions of software packages and libraries are - installed, so you need to select the ones you need. To manage - different versions efficiently, the VSC clusters use so-called - modules, so you will need to select and load the modules that - you need. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt deleted file mode 100644 index 5c715d218a1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt +++ /dev/null @@ -1,24 +0,0 @@ -You can exit the connection at anytime by entering: -$ exit -logout -Connection to login.hpc.ugent.be closed. - tip "tip: Setting your Language right" - You may encounter a warning message similar to the following one during connecting: - perl: warning: Setting locale failed. - perl: warning: Please check that your locale settings: - LANGUAGE = (unset), - LC_ALL = (unset), - LC_CTYPE = "UTF-8", - LANG = (unset) - are supported and installed on your system. - perl: warning: Falling back to the standard locale ("C"). - or any other error message complaining about the locale. - This means that the correct "locale" has not yet been properly specified on your local machine. Try: - LANG= - LC_COLLATE="C" - LC_CTYPE="UTF-8" - LC_MESSAGES="C" - LC_MONETARY="C" - LC_NUMERIC="C" - LC_TIME="C" - LC_ALL= diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json deleted file mode 100644 index 96a1f9cee80..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_9", - "next_title": "connecting_paragraph_11", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt deleted file mode 100644 index df00d4ed2a4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt +++ /dev/null @@ -1,7 +0,0 @@ -Fast file transfer for large datasets -See the section on rsync in chapter 5 of the Linux intro manual. -Changing login nodes -It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. -For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: -ssh gligar07.gastly.os -This is also possible the other way around. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json deleted file mode 100644 index 74ea0125d71..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Changing-login-nodes", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial/uploading_files/#copying-faster-with-rsync" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_14", - "next_title": "connecting_paragraph_16", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt deleted file mode 100644 index dd4f3269fb5..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt +++ /dev/null @@ -1,11 +0,0 @@ -If you want to find out which login host you are connected to, you can use the hostname command. -$ hostname -gligar07.gastly.os -$ ssh gligar08.gastly.os -$ hostname -gligar08.gastly.os -Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux. -These can make sessions that 'survives' across disconnects. -You can find more information on how to use these tools here (or on other online sources): -- screen -- tmux \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json deleted file mode 100644 index 623be877f5b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Changing-login-nodes", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/", - "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_15", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json deleted file mode 100644 index 783e60c1ab5..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connecting-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 1, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/web_portal" - }, - "parent_title": "", - "previous_title": null, - "next_title": "connecting_paragraph_2", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#connecting-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt deleted file mode 100644 index 49c4572f3b2..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt +++ /dev/null @@ -1,18 +0,0 @@ -Connection restrictions -Since March 20th 2020, restrictions are in place that limit from where -you can connect to the VSC HPC infrastructure, in response to security -incidents involving several European HPC centres. -VSC login nodes are only directly accessible from within university -networks, and from (most) Belgian commercial internet providers. -All other IP domains are blocked by default. If you are connecting from -an IP address that is not allowed direct access, you have the following -options to get access to VSC login nodes: -- Use an VPN connection to connect to UGent the network (recommended). See for more information. -- Whitelist your IP address automatically by accessing - and log in with your UGent account. - - While this web connection is active new SSH sessions can be - started. - - Active SSH sessions will remain active even when this web page - is closed. -- Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your - IP range (e.g., for industry access, automated processes). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json deleted file mode 100644 index 10f3e042d9a..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connection-restrictions", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_1", - "next_title": "connecting_paragraph_3", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt deleted file mode 100644 index db490973b7f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt +++ /dev/null @@ -1,9 +0,0 @@ -Trying to establish an SSH connection from an IP address that does not -adhere to these restrictions will result in an immediate failure to -connect, with an error message like: -ssh_exchange_identification: read: Connection reset by peer -First Time connection to the HPC infrastructure -The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. -If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. -If you have any issues connecting to the HPC after you've followed these -steps, see Issues connecting to login node to troubleshoot. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json deleted file mode 100644 index 8d6b1696e08..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/troubleshooting/#issues-connecting-to-login-node" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_2", - "next_title": "connecting_paragraph_4", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt deleted file mode 100644 index 862e6952252..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt +++ /dev/null @@ -1,14 +0,0 @@ -Congratulations, you're on the HPC infrastructure now! -To find out where you have landed you can print the current working directory: -$ pwd -/user/home/gent/vsc400/vsc40000 -Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own -subdirectory structure, copy and prepare your applications, compile and -test them and submit your jobs on the HPC. -$ cd /apps/gent/tutorials -$ ls -Intro-HPC/ -This directory currently contains all training material for the Introduction to the HPC. More -relevant training material to work with the HPC can always be added later in -this directory. -You can now explore the content of this directory with the "ls --l" (lists long) and the "cd" (change directory) commands: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json deleted file mode 100644 index 66b2a89fbb1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_5", - "next_title": "connecting_paragraph_7", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt deleted file mode 100644 index aa590b9b269..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt +++ /dev/null @@ -1,21 +0,0 @@ -As we are interested in the use of the HPC, move further to Intro-HPC and explore the -contents up to 2 levels deep: -$ cd Intro-HPC -$ tree -L 2 -. -'-- examples - |-- Compiling-and-testing-your-software-on-the-HPC - |-- Fine-tuning-Job-Specifications - |-- Multi-core-jobs-Parallel-Computing - |-- Multi-job-submission - |-- Program-examples - |-- Running-batch-jobs - |-- Running-jobs-with-input - |-- Running-jobs-with-input-output-data - |-- example.pbs - '-- example.sh -9 directories, 5 files -This directory contains: -1. This HPC Tutorial (in either a Mac, Linux or Windows version). -2. An examples subdirectory, containing all the examples that you need in this - Tutorial, as well as examples that might be useful for your specific applications. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json deleted file mode 100644 index 6e3f90fbe8a..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_6", - "next_title": "connecting_paragraph_8", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt deleted file mode 100644 index 634df6034b1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt +++ /dev/null @@ -1,12 +0,0 @@ -cd examples - tip - Typing cd ex followed by tab (the Tab-key) will generate the cd examples - command. Command-line completion (also tab completion) is a common feature of the bash command - line interpreter, in which the program automatically fills in partially - typed commands. - tip - For more exhaustive tutorials about Linux usage, see Appendix Useful Linux Commands -The first action is to copy the contents of the HPC examples directory to -your home directory, so that you have your own personal copy and that -you can start using the examples. The "-r" option of the copy command -will also copy the contents of the sub-directories "recursively". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json deleted file mode 100644 index 38f265cfdcd..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/useful_linux_commands" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_7", - "next_title": "connecting_paragraph_9", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt deleted file mode 100644 index ad2fee7457f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt +++ /dev/null @@ -1,19 +0,0 @@ -cp -r /apps/gent/tutorials/Intro-HPC/examples ~/ -Go to your home directory, check your own private examples directory, ...Ā and start working. -cd -ls -l -Upon connecting you will see a login message containing your last login time stamp and a basic overview of the current cluster utilisation. -Last login: Thu Mar 18 13:15:09 2021 from gligarha02.gastly.os - STEVIN HPC-UGent infrastructure status on Mon, 19 Feb 2024 10:00:01 - cluster - full - free - part - total - running - queued - nodes nodes free nodes jobs jobs - ------------------------------------------------------------------------- - skitty 39 0 26 68 1839 5588 - joltik 6 0 1 10 29 18 - doduo 22 0 75 128 1397 11933 - accelgor 4 3 2 9 18 1 - donphan 0 0 16 16 16 13 - gallade 2 0 5 16 19 136 -For a full view of the current loads and queues see: -https://hpc.ugent.be/clusterstate/ -Updates on current system status and planned maintenance can be found on https://www.ugent.be/hpc/en/infrastructure/status diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json deleted file mode 100644 index bd1d462e614..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_8", - "next_title": "connecting_paragraph_10", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt deleted file mode 100644 index dfc59211792..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt +++ /dev/null @@ -1,17 +0,0 @@ -Adding multiple SSH public keys (optional) -In case you are connecting from different computers to the login nodes, -it is advised to use separate SSH public keys to do so. You should -follow these steps. -1. Create a new public/private SSH key pair from the new computer. - Repeat the process described in - sectionĀ Generate a public/private key pair with OpenSSH. -2. Go to -3. Upload the new SSH public key using the Add public key section. Make sure that your - public key is actually saved, because a public key will be refused - if it is too short, wrong type, or in a wrong format. -4. (optional) If you lost your key, you can delete the old key on the - same page. You should keep at least one valid public SSH key in your - account. -5. Take into account that it will take some time before the new SSH - public key is active in your account on the system; waiting for - 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json deleted file mode 100644 index ffdeaf550e0..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Adding-multiple-SSH-public-keys-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Applying-for-the-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" - }, - "previous_title": "account_paragraph_10", - "next_title": "account_paragraph_12", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#adding-multiple-ssh-public-keys-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt deleted file mode 100644 index caaaea5ee91..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt +++ /dev/null @@ -1,14 +0,0 @@ -How do SSH keys work -Launch a terminal from your desktop's application menu and you will see -the bash shell. There are other shells, but most Linux distributions use -bash by default. -Test OpenSSH -Secure Shell (ssh) is a cryptographic network protocol for secure data -communication, remote command-line login, remote command execution, and -other secure network services between two networked computers. In short, -ssh provides a secure connection between 2 computers via insecure -channels (Network, Internet, telephone lines, ...). -"Secure" means that: -1. the User is authenticated to the System; and -2. the System is authenticated to the User; and -3. all data is encrypted during transfer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json deleted file mode 100644 index 7654a65253a..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Test-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_4", - "next_title": "account_linux_paragraph_5.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt deleted file mode 100644 index 318f913fba3..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt +++ /dev/null @@ -1,14 +0,0 @@ -OpenSSH is a FREE implementation of the SSH connectivity protocol. comes -with its own implementation of OpenSSH, so you don't need to install any -third-party software to use it. Just open a terminal window and jump in! -On all popular Linux distributions, the OpenSSH software is readily -available, and most often installed by default. You can check whether -the OpenSSH software is installed by opening a terminal and typing: -$ ssh -V -OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 -To access the clusters and transfer your files, you will use the -following commands: -1. ssh-keygen: to generate the SSH key pair (public + private key); -2. ssh: to open a shell on a remote machine; -3. sftp: a secure equivalent of ftp; -4. scp: a secure equivalent of the remote copy command rcp. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json deleted file mode 100644 index 32f1120307f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Test-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_5.1", - "next_title": "account_linux_paragraph_5.3", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt deleted file mode 100644 index 5df90a3dd7c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt +++ /dev/null @@ -1,16 +0,0 @@ -Generate a public/private key pair with OpenSSH -A key pair might already be present in the default location inside your -home directory. Therefore, we first check if a key is available with the -"list short" ("ls") command: -ls ~/.ssh -If a key-pair is already available, you would normally get: -authorized_keys id_rsa id_rsa.pub known_hosts -Otherwise, the command will show: -ls: .ssh: No such file or directory -You can recognise a public/private key pair when a pair of files has the -same name except for the extension ".pub" added to one of them. In this -particular case, the private key is "id_rsa" and public key is -"id_rsa.pub". You may have multiple keys (not necessarily in the -directory "~/.ssh") if you or your operating system requires this. Be -aware that your existing key pair might be too short, or not the right -type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json deleted file mode 100644 index 722ba1a2ad4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_5.2", - "next_title": "account_linux_paragraph_5.4", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt deleted file mode 100644 index d29d61d27d9..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt +++ /dev/null @@ -1,13 +0,0 @@ -You will need to generate a new key pair, when: -1. you don't have a key pair yet -2. you forgot the passphrase protecting your private key -3. your private key was compromised -4. your key pair is too short or not the right type -For extra security, the private key itself can be encrypted using a -"passphrase", to prevent anyone from using your private key even when -they manage to copy it. You have to "unlock" the private key by typing -the passphrase. Be sure to never give away your private key, it is -private and should stay private. You should not even copy it to one of -your other machines, instead, you should create a new public/private key -pair for each machine. -ssh-keygen -t rsa -b 4096 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json deleted file mode 100644 index 4f65f6ebf36..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_5.3", - "next_title": "account_linux_paragraph_5.5", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt deleted file mode 100644 index 78c142e82e0..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt +++ /dev/null @@ -1,6 +0,0 @@ -This will ask you for a file name to store the private and public key, -and a passphrase to protect your private key. It needs to be emphasised -that you really should choose the passphrase wisely! The system will ask -you for it every time you want to use the private key that is every time -you want to access the cluster or transfer your files. -Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json deleted file mode 100644 index 468fb5d0938..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_5.4", - "next_title": "account_paragraph_6", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt deleted file mode 100644 index c3b395b5296..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt +++ /dev/null @@ -1 +0,0 @@ -Using an SSH agent (optional) \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json deleted file mode 100644 index fb82c40a7d7..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_5", - "next_title": "account_paragraph_7", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt deleted file mode 100644 index 8e8429c1642..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt +++ /dev/null @@ -1,14 +0,0 @@ -Using an SSH agent (optional) -Most recent Unix derivatives include by default an SSH agent ("gnome-keyring-daemon" in most cases) -to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into -the SSH manager keyring to be able to connect to the HPC cluster. If -not, SSH client will display an error message (see Connecting) similar to this: -Agent admitted failure to sign using the key. -Permission denied (publickey,gssapi-keyex,gssapi-with-mic). -This could be fixed using the ssh-add command. You can include the new -private keys' identities in your keyring with: -ssh-add - tip - Without extra options ssh-add adds any key located at $HOME/.ssh - directory, but you can specify the private key location path as - argument, as example: ssh-add /path/to/my/id_rsa. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json deleted file mode 100644 index 2b3633d71e7..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/connecting" - }, - "previous_title": "account_paragraph_6", - "next_title": "account_linux_paragraph_7.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt deleted file mode 100644 index c227dbbb6e2..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt +++ /dev/null @@ -1,8 +0,0 @@ -Check that your key is available from the keyring with: -ssh-add -l -After these changes the key agent will keep your SSH key to connect to -the clusters as usual. - tip - You should execute ssh-add command again if you generate a new SSH - key. -Visit for more information. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json deleted file mode 100644 index de9700c7a5b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_7.1", - "next_title": "account_paragraph_8", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt deleted file mode 100644 index 815c414e059..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt +++ /dev/null @@ -1,7 +0,0 @@ -Applying for the account -After you log in using your UGent login and password, you will be asked to -upload the file that contains your public key, i.e., the file -"id_rsa.pub" which you have generated earlier. Make sure that your -public key is actually accepted for upload, because if it is in a wrong -format, wrong type or too short, then it will be refused. -This file has been stored in the directory "~/.ssh/". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json deleted file mode 100644 index 31c14d853b3..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Applying-for-the-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "account", - "previous_title": "account_paragraph_8", - "next_title": "account_paragraph_10", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#applying-for-the-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt deleted file mode 100644 index 1d912924535..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt +++ /dev/null @@ -1,37 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Note - If you try to set a non-supported locale, then it will be automatically - set to the default. Currently the default is en_US.UFT-8 or en_US, - depending on whether your originally (non-supported) locale was UTF-8 or not. - Open the .bashrc on your local machine with your favourite editor and - add the following lines: - - $ nano ~/.bashrc - ... - export LANGUAGE="en_US.UTF-8" - export LC_ALL="en_US.UTF-8" - export LC_CTYPE="en_US.UTF-8" - export LANG="en_US.UTF-8" - ... - - tip "tip: vi" - To start entering text in vi: move to the place you want to start - entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" - To exit vi without saving your changes, enter ""ESC":q!" - - - or alternatively (if you are not comfortable with the Linux editors), - again on your local machine: - - echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - - You can now log out, open a new terminal/shell on your local machine and - reconnect to the login node, and you should not get these warnings anymore. - \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json deleted file mode 100644 index ef14b084e5f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_10", - "next_title": "connecting_paragraph_12", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt deleted file mode 100644 index d872c89a0f8..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt +++ /dev/null @@ -1,6 +0,0 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -The preferred way to transfer files is by using an scp or sftp via the -secure OpenSSH protocol. ships with an implementation of OpenSSH, so you -don't need to install any third-party software to use it. Just open a -terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json deleted file mode 100644 index 081156a5d16..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Transfer-Files-tofrom-the-HPC", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_11", - "next_title": "connecting_paragraph_13", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt deleted file mode 100644 index 8d0031fcca9..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt +++ /dev/null @@ -1,12 +0,0 @@ -Transfer Files tofrom the HPC -Using scp -Secure copy or SCP is a tool (command) for securely transferring files between a local -host (= your computer) and a remote host (the HPC). It is based on the -Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., -copy) command, but can copy files to or from remote machines. -It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if -you have symlinks to them in your home directory. See -the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux - for how to do this. -Open an additional terminal window and check that you're working on your -local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json deleted file mode 100644 index 6b70790e1e3..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" - }, - "previous_title": "connecting_paragraph_12", - "next_title": "connecting_linux_paragraph_13.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt deleted file mode 100644 index f1da0677a67..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt +++ /dev/null @@ -1,17 +0,0 @@ -$ hostname - -If you're still using the terminal that is connected to the HPC, close the -connection by typing "exit" in the terminal window. -For example, we will copy the (local) file "localfile.txt" to your -home directory on the HPC cluster. We first generate a small dummy -"localfile.txt", which contains the word "Hello". Use your own VSC -account, which is something like "vsc40000". Don't forget the colon (:) at the -end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your -local filesystem. You can even specify where to save the file on the -remote filesystem by putting a path after the colon. -$ echo "Hello" > localfile.txt -$ ls -l -... --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -$ scp localfile.txt vsc40000@login.hpc.ugent.be: -localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json deleted file mode 100644 index 43affa4e36c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_13.1", - "next_title": "connecting_linux_paragraph_13.3", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt deleted file mode 100644 index 9585900e356..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt +++ /dev/null @@ -1,22 +0,0 @@ -Connect to the HPC via another terminal, print the working directory (to -make sure you're in the home directory) and check whether the file has -arrived: -$ pwd -/user/home/gent/vsc400/vsc40000 -$ ls -l -total 1536 -drwxrwxr-x 2 -drwxrwxr-x 2 -drwxrwxr-x 10 --rw-r--r-- 1 -$ cat localfile.txt -Hello -The scp command can also be used to copy files from the cluster to your -local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" -subdirectory on the cluster to your local computer. -First, we will confirm that the file is indeed in the "docs" -subdirectory. In the terminal on the login node, enter: -$ cd ~/docs -$ ls -l -total 1536 --rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json deleted file mode 100644 index ccc74bb5b94..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_13.2", - "next_title": "connecting_linux_paragraph_13.4", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt deleted file mode 100644 index d09b69552ef..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt +++ /dev/null @@ -1,14 +0,0 @@ -Now we will copy the file to the local machine. On the terminal on your -own local computer, enter: -$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . -intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 -$ ls -l -total 899 --rw-r--r-- 1 user staff 741995 Sep 18 09:53 --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -The file has been copied from the HPC to your local computer. -It's also possible to copy entire directories (and their contents) with -the -r flag. For example, if we want to copy the local directory -dataset to $VSC_SCRATCH, we can use the following command (assuming -you've created the scratch symlink): -scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json deleted file mode 100644 index 9ffcc4121f4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_13.3", - "next_title": "connecting_linux_paragraph_13.5", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt deleted file mode 100644 index 532d57bb4a5..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt +++ /dev/null @@ -1,14 +0,0 @@ -If you don't use the -r option to copy a directory, you will run into -the following error: -$ scp dataset vsc40000@login.hpc.ugent.be:scratch -dataset: not a regular file -Using sftp -The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file -transfer and file management functionalities over any reliable data -stream. It was designed as an extension of the Secure Shell protocol -(SSH) version 2.0. This protocol assumes that it is run over a secure -channel, such as SSH, that the server has already authenticated the -client, and that the identity of the client user is available to the -protocol. -The sftp is an equivalent of the ftp command, with the difference that -it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json deleted file mode 100644 index 8e3b4056b6b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-sftp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_13.4", - "next_title": "connecting_linux_paragraph_13.6", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt deleted file mode 100644 index 1ef13b80c6f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt +++ /dev/null @@ -1,18 +0,0 @@ -One easy way of starting a sftp session is -sftp vsc40000@login.hpc.ugent.be -Typical and popular commands inside an sftp session are: -| | | -|:--------------------------|:-------------------------------------------------------------------------------------| -| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | -| ls | Get a list of the files in the current directory on the HPC. | -| get fibo.py | Copy the file "fibo.py" from the HPC | -| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | -| lcd test | Move to the "test" subdirectory on your local machine. | -| lcd .. | Move up one level in the local directory. | -| lls | Get local directory listing. | -| put test.py | Copy the local file test.py to the HPC. | -| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | -| bye | Quit the sftp session | -| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | -| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | -| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json deleted file mode 100644 index c7fe6bf6a44..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-sftp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/", - "1": "https://docs.hpc.ugent.be/" - }, - "previous_title": "connecting_linux_paragraph_13.5", - "next_title": "connecting_linux_paragraph_13.7", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt deleted file mode 100644 index a0496edfb14..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt +++ /dev/null @@ -1,10 +0,0 @@ -Transfer Files tofrom the HPC -Using a GUI -If you prefer a GUI to transfer files back and forth to the HPC, you can -use your file browser. Open your file browser and press -++"Ctrl"+"l"++ -This should open up a address bar where you can enter a URL. -Alternatively, look for the "connect to server" option in your file -browsers menu. -Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter. -You should now be able to browse files on the HPC in your file browser. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json deleted file mode 100644 index e3c48fe4829..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-a-GUI", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_paragraph_13", - "next_title": "connecting_paragraph_15", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt deleted file mode 100644 index 27ae3fb7bd4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt +++ /dev/null @@ -1,12 +0,0 @@ -First Time connection to the HPC infrastructure -Connect -Open up a terminal and enter the following command to connect to the HPC. -ssh vsc40000@login.hpc.ugent.be -Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login -node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. -The first time you make a connection to the login node, you will be -asked to verify the authenticity of the login node. Please check -Warning message when first connecting to new host on how to do this. -A possible error message you can get if you previously saved your -private key somewhere else than the default location -($HOME/.ssh/id_rsa): diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json deleted file mode 100644 index 66c5dc4aeff..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connect", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" - }, - "previous_title": "connecting_paragraph_4", - "next_title": "connecting_linux_paragraph_5.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt deleted file mode 100644 index be01e09bba0..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt +++ /dev/null @@ -1,4 +0,0 @@ -Permission denied (publickey,gssapi-keyex,gssapi-with-mic). -In this case, use the -i option for the ssh command to specify the -location of your private key. For example: -ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json deleted file mode 100644 index 21b63518804..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connect", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_linux_paragraph_5.1", - "next_title": "connecting_paragraph_6", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt deleted file mode 100644 index dfc59211792..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt +++ /dev/null @@ -1,17 +0,0 @@ -Adding multiple SSH public keys (optional) -In case you are connecting from different computers to the login nodes, -it is advised to use separate SSH public keys to do so. You should -follow these steps. -1. Create a new public/private SSH key pair from the new computer. - Repeat the process described in - sectionĀ Generate a public/private key pair with OpenSSH. -2. Go to -3. Upload the new SSH public key using the Add public key section. Make sure that your - public key is actually saved, because a public key will be refused - if it is too short, wrong type, or in a wrong format. -4. (optional) If you lost your key, you can delete the old key on the - same page. You should keep at least one valid public SSH key in your - account. -5. Take into account that it will take some time before the new SSH - public key is active in your account on the system; waiting for - 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json deleted file mode 100644 index d9d3c33f876..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Adding-multiple-SSH-public-keys-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Applying-for-the-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" - }, - "previous_title": "account_paragraph_10", - "next_title": "account_paragraph_12", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#adding-multiple-ssh-public-keys-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt deleted file mode 100644 index d96c80b42a2..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt +++ /dev/null @@ -1,15 +0,0 @@ -How do SSH keys work -To open a Terminal window in macOS, open the Finder and choose -*\>\> Applications \> Utilities \> Terminal* -Before requesting an account, you need to generate a pair of ssh keys. -One popular way to do this on is using the OpenSSH client included with , which you can then also use to log on to the clusters. -Test OpenSSH -Secure Shell (ssh) is a cryptographic network protocol for secure data -communication, remote command-line login, remote command execution, and -other secure network services between two networked computers. In short, -ssh provides a secure connection between 2 computers via insecure -channels (Network, Internet, telephone lines, ...). -"Secure" means that: -1. the User is authenticated to the System; and -2. the System is authenticated to the User; and -3. all data is encrypted during transfer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json deleted file mode 100644 index 028d9d25f7f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Test-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_4", - "next_title": "account_macos_paragraph_5.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt deleted file mode 100644 index 318f913fba3..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt +++ /dev/null @@ -1,14 +0,0 @@ -OpenSSH is a FREE implementation of the SSH connectivity protocol. comes -with its own implementation of OpenSSH, so you don't need to install any -third-party software to use it. Just open a terminal window and jump in! -On all popular Linux distributions, the OpenSSH software is readily -available, and most often installed by default. You can check whether -the OpenSSH software is installed by opening a terminal and typing: -$ ssh -V -OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 -To access the clusters and transfer your files, you will use the -following commands: -1. ssh-keygen: to generate the SSH key pair (public + private key); -2. ssh: to open a shell on a remote machine; -3. sftp: a secure equivalent of ftp; -4. scp: a secure equivalent of the remote copy command rcp. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json deleted file mode 100644 index dfec6f6fd5a..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Test-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_5.1", - "next_title": "account_macos_paragraph_5.3", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt deleted file mode 100644 index 5df90a3dd7c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt +++ /dev/null @@ -1,16 +0,0 @@ -Generate a public/private key pair with OpenSSH -A key pair might already be present in the default location inside your -home directory. Therefore, we first check if a key is available with the -"list short" ("ls") command: -ls ~/.ssh -If a key-pair is already available, you would normally get: -authorized_keys id_rsa id_rsa.pub known_hosts -Otherwise, the command will show: -ls: .ssh: No such file or directory -You can recognise a public/private key pair when a pair of files has the -same name except for the extension ".pub" added to one of them. In this -particular case, the private key is "id_rsa" and public key is -"id_rsa.pub". You may have multiple keys (not necessarily in the -directory "~/.ssh") if you or your operating system requires this. Be -aware that your existing key pair might be too short, or not the right -type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json deleted file mode 100644 index 5a10e780b45..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_5.2", - "next_title": "account_macos_paragraph_5.4", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt deleted file mode 100644 index d29d61d27d9..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt +++ /dev/null @@ -1,13 +0,0 @@ -You will need to generate a new key pair, when: -1. you don't have a key pair yet -2. you forgot the passphrase protecting your private key -3. your private key was compromised -4. your key pair is too short or not the right type -For extra security, the private key itself can be encrypted using a -"passphrase", to prevent anyone from using your private key even when -they manage to copy it. You have to "unlock" the private key by typing -the passphrase. Be sure to never give away your private key, it is -private and should stay private. You should not even copy it to one of -your other machines, instead, you should create a new public/private key -pair for each machine. -ssh-keygen -t rsa -b 4096 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json deleted file mode 100644 index 8da465c1f24..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_5.3", - "next_title": "account_macos_paragraph_5.5", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt deleted file mode 100644 index 78c142e82e0..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt +++ /dev/null @@ -1,6 +0,0 @@ -This will ask you for a file name to store the private and public key, -and a passphrase to protect your private key. It needs to be emphasised -that you really should choose the passphrase wisely! The system will ask -you for it every time you want to use the private key that is every time -you want to access the cluster or transfer your files. -Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json deleted file mode 100644 index 9d6f7b1a741..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_5.4", - "next_title": "account_paragraph_6", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt deleted file mode 100644 index c3b395b5296..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt +++ /dev/null @@ -1 +0,0 @@ -Using an SSH agent (optional) \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json deleted file mode 100644 index 17a34a2f80b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_5", - "next_title": "account_paragraph_7", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt deleted file mode 100644 index 1069ebd9fbd..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt +++ /dev/null @@ -1,14 +0,0 @@ -Using an SSH agent (optional) -Most recent Unix derivatives include by default an SSH agent -to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into -the SSH manager keyring to be able to connect to the HPC cluster. If -not, SSH client will display an error message (see Connecting) similar to this: -Agent admitted failure to sign using the key. -Permission denied (publickey,gssapi-keyex,gssapi-with-mic). -This could be fixed using the ssh-add command. You can include the new -private keys' identities in your keyring with: -ssh-add - tip - Without extra options ssh-add adds any key located at $HOME/.ssh - directory, but you can specify the private key location path as - argument, as example: ssh-add /path/to/my/id_rsa. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json deleted file mode 100644 index 18b3b3675de..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/connecting" - }, - "previous_title": "account_paragraph_6", - "next_title": "account_macos_paragraph_7.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt deleted file mode 100644 index c880ee4a228..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt +++ /dev/null @@ -1,7 +0,0 @@ -Check that your key is available from the keyring with: -ssh-add -l -After these changes the key agent will keep your SSH key to connect to -the clusters as usual. - tip - You should execute ssh-add command again if you generate a new SSH - key. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json deleted file mode 100644 index 072a43cb3e4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_7.1", - "next_title": "account_paragraph_8", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt deleted file mode 100644 index 5a5a52da062..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt +++ /dev/null @@ -1,12 +0,0 @@ -Applying for the account -After you log in using your UGent login and password, you will be asked to -upload the file that contains your public key, i.e., the file -"id_rsa.pub" which you have generated earlier. Make sure that your -public key is actually accepted for upload, because if it is in a wrong -format, wrong type or too short, then it will be refused. -This file has been stored in the directory "~/.ssh/". - tip - As ".ssh" is an invisible directory, the Finder will not show it by - default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++), - which will allow you to enter the name of a directory, which you would - like to open in Finder. Here, type "~/.ssh" and press enter. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json deleted file mode 100644 index 86c8c2048bf..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Applying-for-the-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "account", - "previous_title": "account_paragraph_8", - "next_title": "account_paragraph_10", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#applying-for-the-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt deleted file mode 100644 index 1d912924535..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt +++ /dev/null @@ -1,37 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Note - If you try to set a non-supported locale, then it will be automatically - set to the default. Currently the default is en_US.UFT-8 or en_US, - depending on whether your originally (non-supported) locale was UTF-8 or not. - Open the .bashrc on your local machine with your favourite editor and - add the following lines: - - $ nano ~/.bashrc - ... - export LANGUAGE="en_US.UTF-8" - export LC_ALL="en_US.UTF-8" - export LC_CTYPE="en_US.UTF-8" - export LANG="en_US.UTF-8" - ... - - tip "tip: vi" - To start entering text in vi: move to the place you want to start - entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" - To exit vi without saving your changes, enter ""ESC":q!" - - - or alternatively (if you are not comfortable with the Linux editors), - again on your local machine: - - echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - - You can now log out, open a new terminal/shell on your local machine and - reconnect to the login node, and you should not get these warnings anymore. - \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json deleted file mode 100644 index 323292b910e..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_10", - "next_title": "connecting_paragraph_12", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt deleted file mode 100644 index d872c89a0f8..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt +++ /dev/null @@ -1,6 +0,0 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -The preferred way to transfer files is by using an scp or sftp via the -secure OpenSSH protocol. ships with an implementation of OpenSSH, so you -don't need to install any third-party software to use it. Just open a -terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json deleted file mode 100644 index 8a420f36c2b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Transfer-Files-tofrom-the-HPC", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_11", - "next_title": "connecting_paragraph_13", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt deleted file mode 100644 index 8d0031fcca9..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt +++ /dev/null @@ -1,12 +0,0 @@ -Transfer Files tofrom the HPC -Using scp -Secure copy or SCP is a tool (command) for securely transferring files between a local -host (= your computer) and a remote host (the HPC). It is based on the -Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., -copy) command, but can copy files to or from remote machines. -It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if -you have symlinks to them in your home directory. See -the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux - for how to do this. -Open an additional terminal window and check that you're working on your -local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json deleted file mode 100644 index 79157005600..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" - }, - "previous_title": "connecting_paragraph_12", - "next_title": "connecting_macos_paragraph_13.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt deleted file mode 100644 index f1da0677a67..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt +++ /dev/null @@ -1,17 +0,0 @@ -$ hostname - -If you're still using the terminal that is connected to the HPC, close the -connection by typing "exit" in the terminal window. -For example, we will copy the (local) file "localfile.txt" to your -home directory on the HPC cluster. We first generate a small dummy -"localfile.txt", which contains the word "Hello". Use your own VSC -account, which is something like "vsc40000". Don't forget the colon (:) at the -end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your -local filesystem. You can even specify where to save the file on the -remote filesystem by putting a path after the colon. -$ echo "Hello" > localfile.txt -$ ls -l -... --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -$ scp localfile.txt vsc40000@login.hpc.ugent.be: -localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json deleted file mode 100644 index dc57de365bf..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_13.1", - "next_title": "connecting_macos_paragraph_13.3", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt deleted file mode 100644 index 9585900e356..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt +++ /dev/null @@ -1,22 +0,0 @@ -Connect to the HPC via another terminal, print the working directory (to -make sure you're in the home directory) and check whether the file has -arrived: -$ pwd -/user/home/gent/vsc400/vsc40000 -$ ls -l -total 1536 -drwxrwxr-x 2 -drwxrwxr-x 2 -drwxrwxr-x 10 --rw-r--r-- 1 -$ cat localfile.txt -Hello -The scp command can also be used to copy files from the cluster to your -local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" -subdirectory on the cluster to your local computer. -First, we will confirm that the file is indeed in the "docs" -subdirectory. In the terminal on the login node, enter: -$ cd ~/docs -$ ls -l -total 1536 --rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json deleted file mode 100644 index 5a4623c650d..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_13.2", - "next_title": "connecting_macos_paragraph_13.4", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt deleted file mode 100644 index d09b69552ef..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt +++ /dev/null @@ -1,14 +0,0 @@ -Now we will copy the file to the local machine. On the terminal on your -own local computer, enter: -$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . -intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 -$ ls -l -total 899 --rw-r--r-- 1 user staff 741995 Sep 18 09:53 --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -The file has been copied from the HPC to your local computer. -It's also possible to copy entire directories (and their contents) with -the -r flag. For example, if we want to copy the local directory -dataset to $VSC_SCRATCH, we can use the following command (assuming -you've created the scratch symlink): -scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json deleted file mode 100644 index 54b3fe19d58..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_13.3", - "next_title": "connecting_macos_paragraph_13.5", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt deleted file mode 100644 index 532d57bb4a5..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt +++ /dev/null @@ -1,14 +0,0 @@ -If you don't use the -r option to copy a directory, you will run into -the following error: -$ scp dataset vsc40000@login.hpc.ugent.be:scratch -dataset: not a regular file -Using sftp -The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file -transfer and file management functionalities over any reliable data -stream. It was designed as an extension of the Secure Shell protocol -(SSH) version 2.0. This protocol assumes that it is run over a secure -channel, such as SSH, that the server has already authenticated the -client, and that the identity of the client user is available to the -protocol. -The sftp is an equivalent of the ftp command, with the difference that -it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json deleted file mode 100644 index 0b9ba08e3b1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-sftp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_13.4", - "next_title": "connecting_macos_paragraph_13.6", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt deleted file mode 100644 index 1ef13b80c6f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt +++ /dev/null @@ -1,18 +0,0 @@ -One easy way of starting a sftp session is -sftp vsc40000@login.hpc.ugent.be -Typical and popular commands inside an sftp session are: -| | | -|:--------------------------|:-------------------------------------------------------------------------------------| -| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | -| ls | Get a list of the files in the current directory on the HPC. | -| get fibo.py | Copy the file "fibo.py" from the HPC | -| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | -| lcd test | Move to the "test" subdirectory on your local machine. | -| lcd .. | Move up one level in the local directory. | -| lls | Get local directory listing. | -| put test.py | Copy the local file test.py to the HPC. | -| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | -| bye | Quit the sftp session | -| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | -| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | -| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json deleted file mode 100644 index 9b08fbde549..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-sftp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/", - "1": "https://docs.hpc.ugent.be/" - }, - "previous_title": "connecting_macos_paragraph_13.5", - "next_title": "connecting_macos_paragraph_13.7", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt deleted file mode 100644 index 20a4acb40a8..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt +++ /dev/null @@ -1,15 +0,0 @@ -Transfer Files tofrom the HPC -Using a GUI (Cyberduck) -Cyberduck is a graphical alternative to the scp command. It can be -installed from . -This is the one-time setup you will need to do before connecting: -1. After starting Cyberduck, the Bookmark tab will show up. To add a - new bookmark, click on the "+" sign on the bottom left of the - window. A new window will open. -2. In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)". -3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in - your VSC account id (this looks like vsc40000). -4. Select the location of your SSH private key in the "SSH Private Key" field. -5. Finally, type in a name for the bookmark in the "Nickname" field and - close the window by pressing on the red circle in the top left - corner of the window. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json deleted file mode 100644 index 694b7682aa9..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-a-GUI-(Cyberduck)", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_paragraph_13", - "next_title": "connecting_macos_paragraph_14.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt deleted file mode 100644 index 1d20edf411f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt +++ /dev/null @@ -1,3 +0,0 @@ -To open the connection, click on the "Bookmarks" icon (which -resembles an open book) and double-click on the bookmark you just -created. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json deleted file mode 100644 index e32b1ab4c58..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-a-GUI-(Cyberduck)", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_14.1", - "next_title": "connecting_paragraph_15", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt deleted file mode 100644 index 1e22cfc8b1f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt +++ /dev/null @@ -1,10 +0,0 @@ -First Time connection to the HPC infrastructure -Connect -Open up a terminal and enter the following command to connect to the HPC. -You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search. -ssh vsc40000@login.hpc.ugent.be -Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login -node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. -The first time you make a connection to the login node, you will be -asked to verify the authenticity of the login node. Please check -Warning message when first connecting to new host on how to do this. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json deleted file mode 100644 index f928fbfcdd6..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connect", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" - }, - "previous_title": "connecting_paragraph_4", - "next_title": "connecting_macos_paragraph_5.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt deleted file mode 100644 index f3f5ac6e775..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt +++ /dev/null @@ -1,7 +0,0 @@ -A possible error message you can get if you previously saved your -private key somewhere else than the default location -($HOME/.ssh/id_rsa): -Permission denied (publickey,gssapi-keyex,gssapi-with-mic). -In this case, use the -i option for the ssh command to specify the -location of your private key. For example: -ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json deleted file mode 100644 index 047d5863361..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connect", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_macos_paragraph_5.1", - "next_title": "connecting_paragraph_6", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt deleted file mode 100644 index ca00a8a0f65..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt +++ /dev/null @@ -1,17 +0,0 @@ -Adding multiple SSH public keys (optional) -In case you are connecting from different computers to the login nodes, -it is advised to use separate SSH public keys to do so. You should -follow these steps. -1. Create a new public/private SSH key pair from Putty. Repeat the - process described in - sectionĀ Generate a public/private key pair. -2. Go to -3. Upload the new SSH public key using the Add public key section. Make sure that your - public key is actually saved, because a public key will be refused - if it is too short, wrong type, or in a wrong format. -4. (optional) If you lost your key, you can delete the old key on the - same page. You should keep at least one valid public SSH key in your - account. -5. Take into account that it will take some time before the new SSH - public key is active in your account on the system; waiting for - 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json deleted file mode 100644 index 4614c053f2c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Adding-multiple-SSH-public-keys-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Applying-for-the-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair" - }, - "previous_title": "account_paragraph_10", - "next_title": "account_paragraph_12", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#adding-multiple-ssh-public-keys-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt deleted file mode 100644 index 93ca7ac9da5..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt +++ /dev/null @@ -1,18 +0,0 @@ -How do SSH keys work -A typical Windows environment does not come with pre-installed software -to connect and run command-line executables on a HPC. Some tools need to be -installed on your Windows machine first, before we can start the actual -work. -Get PuTTY: A free telnet/SSH client -We recommend to use the PuTTY tools package, which is freely available. -You do not need to install PuTTY, you can download the PuTTY and -PuTTYgen executable and run it. This can be useful in situations where -you do not have the required permissions to install software on the -computer you are using. Alternatively, an installation package is also -available. -You can download PuTTY from the official address: -. You -probably want the 64-bits version. If you can install software on your -computer, you can use the "Package files", if not, you can download and -use putty.exe and puttygen.exe in the "Alternative binary files" -section. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json deleted file mode 100644 index e0024f40d55..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Get-PuTTY-A-free-telnetSSH-client", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_3", - "next_title": "account_windows_paragraph_4.2", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt deleted file mode 100644 index cebd1da3baf..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt +++ /dev/null @@ -1,13 +0,0 @@ -The PuTTY package consists of several components, but we'll only use -two: -1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) -2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, - see Generate a public/private key pair) -Generating a public/private key pair -Before requesting a VSC account, you need to generate a pair of ssh -keys. You need 2 keys, a public and a private key. You can visualise the -public key as a lock to which only you have the key (your private key). -You can send a copy of your lock to anyone without any problems, because -only you can open it, as long as you keep your private key secure. To -generate a public/private key pair, you can use the PuTTYgen key -generator. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json deleted file mode 100644 index 534ebda0a1c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generating-a-publicprivate-key-pair", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/#open-a-terminal", - "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair" - }, - "previous_title": "account_windows_paragraph_4.1", - "next_title": "account_windows_paragraph_4.3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt deleted file mode 100644 index 6e65300562d..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt +++ /dev/null @@ -1,30 +0,0 @@ -Start PuTTYgen.exe it and follow these steps: -1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of - bits in the key to 4096. -2. Click on "Generate". To generate the key, you must move the mouse cursor over - the PuTTYgen window (this generates some random data that PuTTYgen - uses to generate the key pair). Once the key pair is generated, your - public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file". -3. Next, it is advised to fill in the "Key comment" field to make it easier - identifiable afterwards. -4. Next, you should specify a passphrase in the "Key passphrase" field and retype it in - the "Confirm passphrase" field. Remember, the passphrase protects the private key against - unauthorised use, so it is best to choose one that is not too easy - to guess but that you can still remember. Using a passphrase is not - required, but we recommend you to use a good passphrase unless you - are certain that your computer's hard disk is encrypted with a - decent password. (If you are not sure your disk is encrypted, it - probably isn't.) -5. Save both the public and private keys in a folder on your personal - computer (We recommend to create and put them in the folder - "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the - buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and - "id_rsa.ppk" for the private key. -6. Finally, save an "OpenSSH" version of your private key (in - particular for later "X2Go" usage, see x2go) by entering the - "Conversions" menu and selecting "Export OpenSSH key" (do not select the - "force new file format" variant). Save the file in the same location - as in the previous step with filename "id_rsa". (If there is no - "Conversions" menu, you must update your "puttygen" version. If you - want to do this conversion afterwards, you can start with loading an - existing "id_rsa.ppk" and only do this conversions export.) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json deleted file mode 100644 index 4555638639d..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generating-a-publicprivate-key-pair", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/" - }, - "previous_title": "account_windows_paragraph_4.2", - "next_title": "account_windows_paragraph_4.4", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt deleted file mode 100644 index d0425d6738f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt +++ /dev/null @@ -1,2 +0,0 @@ -If you use another program to generate a key pair, please remember that -they need to be in the OpenSSH format to access the HPC clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json deleted file mode 100644 index ebd55060657..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generating-a-publicprivate-key-pair", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_windows_paragraph_4.3", - "next_title": "account_paragraph_5", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt deleted file mode 100644 index b8dba743c0a..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt +++ /dev/null @@ -1,13 +0,0 @@ -Using an SSH agent (optional) -It is possible to setup a SSH agent in Windows. This is an optional -configuration to help you to keep all your SSH keys (if you have -several) stored in the same key ring to avoid to type the SSH key -password each time. The SSH agent is also necessary to enable SSH hops -with key forwarding from Windows. -Pageant is the SSH authentication agent used in windows. This agent should be -available from the PuTTY installation package - or as -stand alone binary package. -After the installation just start the Pageant application in Windows, -this will start the agent in background. The agent icon will be visible -from the Windows panel. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json deleted file mode 100644 index 5fd697066b6..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_5", - "next_title": "account_windows_paragraph_6.2", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt deleted file mode 100644 index 62ac04dd9aa..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt +++ /dev/null @@ -1,11 +0,0 @@ -At this point the agent does not contain any private key. You should -include the private key(s) generated in the previous section Generating a public/private key pair. -1. Click on "Add key" -2. Select the private key file generated in Generating a public/private key pair ("id_rsa.ppk" by default). -3. Enter the same SSH key password used to generate the key. After this - step the new key will be included in Pageant to manage the SSH - connections. -4. You can see the SSH key(s) available in the key ring just clicking - on "View Keys". -5. You can change PuTTY setup to use the SSH agent. Open PuTTY and check - Connection > SSH > Auth > Allow agent forwarding. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json deleted file mode 100644 index 11c69338029..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair", - "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair" - }, - "previous_title": "account_windows_paragraph_6.1", - "next_title": "account_windows_paragraph_6.3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt deleted file mode 100644 index 17c94975dec..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt +++ /dev/null @@ -1,5 +0,0 @@ -Now you can connect to the login nodes as usual. The SSH agent will know -which SSH key should be used and you do not have to type the SSH -passwords each time, this task is done by Pageant agent automatically. -It is also possible to use WinSCP with Pageant, see - for more details. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json deleted file mode 100644 index e33d002d248..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_windows_paragraph_6.2", - "next_title": "account_paragraph_7", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt deleted file mode 100644 index 9fd23612756..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt +++ /dev/null @@ -1,8 +0,0 @@ -Applying for the account -After you log in using your UGent login and password, you will be asked to -upload the file that contains your public key, i.e., the file -"id_rsa.pub" which you have generated earlier. Make sure that your -public key is actually accepted for upload, because if it is in a wrong -format, wrong type or too short, then it will be refused. -This file should have been stored in the directory -"C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh" diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json deleted file mode 100644 index 87cda41283f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Applying-for-the-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "account", - "previous_title": "account_paragraph_8", - "next_title": "account_paragraph_10", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#applying-for-the-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt deleted file mode 100644 index 5aa8ca03374..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt +++ /dev/null @@ -1,9 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Note - If you try to set a non-supported locale, then it will be automatically - set to the default. Currently the default is en_US.UFT-8 or en_US, - depending on whether your originally (non-supported) locale was UTF-8 or not. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json deleted file mode 100644 index d4b02dbc9fb..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_10", - "next_title": "connecting_paragraph_12", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt deleted file mode 100644 index 67e5e454852..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt +++ /dev/null @@ -1,22 +0,0 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -WinSCP -To transfer files to and from the cluster, we recommend the use of -WinSCP, a graphical file management tool which can transfer files using -secure protocols such as SFTP and SCP. WinSCP is freely available from -. -To transfer your files using WinSCP, -1. Open the program -2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" - 1. Click "New Site". - 2. Enter "login.hpc.ugent.be" in the "Host name" field. - 3. Enter your "vsc-account" in the "User name" field. - 4. Select "SCP" as the "file" protocol. - 5. Note that the password field remains empty. - - 6. Click "Advanced...". - 7. Click "SSH > Authentication". - 8. Select your private key in the field "Private key file". -3. Press the "Save" button, to save the session under "Session > Sites" for future access. -4. Finally, when clicking on "Login", you will be asked for your key passphrase. - diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json deleted file mode 100644 index a4bbaee0f59..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "WinSCP", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_11", - "next_title": "connecting_windows_paragraph_12.2", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt deleted file mode 100644 index 82c71ac4129..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt +++ /dev/null @@ -1,11 +0,0 @@ -The first time you make a connection to the login node, a Security -Alert will appear and you will be asked to verify the authenticity of the -login node. -Make sure the fingerprint in the alert matches one of the following: -- ssh-rsa 2048 10:2f:31:21:04:75:cb:ed:67:e0:d5:0c:a1:5a:f4:78 -- ssh-rsa 2048 SHA256:W8Wz0/FkkCR2ulN7+w8tNI9M0viRgFr2YlHrhKD2Dd0 -- ssh-ed25519 255 19:28:76:94:52:9d:ff:7d:fb:8b:27:b6:d7:69:42:eb -- ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA -- ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f -- ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ -If it does, press Yes, if it doesn't, please contact hpc@ugent.be. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json deleted file mode 100644 index 80a8ef763a1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "WinSCP", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_windows_paragraph_12.1", - "next_title": "connecting_windows_paragraph_12.3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt deleted file mode 100644 index c0ffe6b4602..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt +++ /dev/null @@ -1,6 +0,0 @@ -Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255 -rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using. -It is safe to ignore this 255 versus 256 difference, but the part after should be -identical. -Now, try out whether you can transfer an arbitrary file from your local -machine to the HPC and back. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json deleted file mode 100644 index 07760730d56..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "WinSCP", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_windows_paragraph_12.2", - "next_title": "connecting_paragraph_13", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt deleted file mode 100644 index e45f4e63b85..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt +++ /dev/null @@ -1,42 +0,0 @@ -First Time connection to the HPC infrastructure -Open a Terminal -You've generated a public/private key pair with PuTTYgen and have an -approved account on the VSC clusters. The next step is to setup the -connection to (one of) the HPC. -In the screenshots, we show the setup for user -"vsc20167" -to the HPC cluster via the login node "login.hpc.ugent.be". -1. Start the PuTTY executable putty.exe in your directory - C:\Program Files (x86)\PuTTY and the configuration screen will pop - up. As you will often use the PuTTY tool, we recommend adding a - shortcut on your desktop. -2. Within the category , in the field , enter the name of the - login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to. - -3. In the category "Connection > Data", in the field "Auto-login username", put in , which is your VSC - username that you have received by e-mail after your request was - approved. - -4. In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key - (i.e., "id_rsa.ppk") that you generated and saved above. -5. In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox. -6. Now go back to , and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to - store the session information. - -7. Now pressing "Open", will open a terminal window and asks for you - passphrase. - -8. If this is your first time connecting, you will be asked to verify - the authenticity of the login node. Please see - sectionĀ Warning message when first connecting to new host - on how to do this. -9. After entering your correct passphrase, you will be connected to the - login-node of the HPC. -10. To check you can now "Print the Working Directory" (pwd) and check - the name of the computer, where you have logged in (hostname): - $ pwd - /user/home/gent/vsc400/vsc40000 - $ hostname -f - gligar07.gastly.os -11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") - from the list, "Load" it and press "Open". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json deleted file mode 100644 index d3b7d581c94..00000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Open-a-Terminal", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" - }, - "previous_title": "connecting_paragraph_3", - "next_title": "connecting_paragraph_5", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" -} \ No newline at end of file From 1ef1f10e6b05839f604fe65e2370599e580c2382 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 13:26:47 +0200 Subject: [PATCH 141/152] Changed paragraphs to decide length based on tokens instead of characters --- scripts/HPC_chatbot_preprocessor/README.md | 2 +- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 96a99498451..6cfd9be8231 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -36,7 +36,7 @@ Including this option will split the source files based on the titles and subtit #### `pl`/`min_paragraph_length` -This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 683 characters. This options only works if `split_on_titles` is not enabled. +This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 512 tokens. This options only works if `split_on_titles` is not enabled. #### `td`/`max_title_depth` diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index cff487f8589..7c3e63c0197 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -6,6 +6,7 @@ import os import re import shutil +import tiktoken import yaml from itertools import chain, tee, zip_longest from pathlib import Path @@ -615,7 +616,12 @@ def paragraph_long_enough(paragraph, options): :return: """ # TODO: change this into something that uses the tokenizer - return len(paragraph) >= options[MIN_PARAGRAPH_LENGTH] + encoding = tiktoken.get_encoding("cl100k_base") + token_amount = len(encoding.encode(paragraph)) + + print(token_amount) + + return token_amount >= options[MIN_PARAGRAPH_LENGTH] def write_metadata(main_title, subtitle, links, title_level, directory, source_file): @@ -1144,7 +1150,7 @@ def main(options): main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for _ in range(5)] + curr_dirs = [filename[:-3] for _ in range(options[MAX_TITLE_DEPTH] + 1)] ################### actually parse the md file ################### @@ -1212,7 +1218,7 @@ def main(options): parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=683, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=512, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)") parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") From 621c0a3f083966f2aaa097516767fdf2d4fdd559 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 13:27:57 +0200 Subject: [PATCH 142/152] Changed paragraphs to decide length based on tokens instead of characters --- scripts/HPC_chatbot_preprocessor/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 4d27d462460..37137582aad 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -1,2 +1,4 @@ PyYAML==6.0.2 -Jinja2==3.1.4 \ No newline at end of file +Jinja2==3.1.4 +tiktoken~=0.7.0 +pathlib~=1.0.1 \ No newline at end of file From adf364d1f897e433fbc2f0fcc80b8fdeb4f22a43 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 13:28:45 +0200 Subject: [PATCH 143/152] Changed paragraphs to decide length based on tokens instead of characters --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 7c3e63c0197..24e0b287a0a 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -615,12 +615,9 @@ def paragraph_long_enough(paragraph, options): :param options: dictionary containing the options given by the user :return: """ - # TODO: change this into something that uses the tokenizer encoding = tiktoken.get_encoding("cl100k_base") token_amount = len(encoding.encode(paragraph)) - print(token_amount) - return token_amount >= options[MIN_PARAGRAPH_LENGTH] From 32b8b741c8582a98b122b230742e1be09ba8c698 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 14:45:34 +0200 Subject: [PATCH 144/152] removing unnecessary files --- scripts/HPC_chatbot_preprocessor/README.md | 196 --- .../chatbot_parser.py | 1236 ----------------- .../HPC_chatbot_preprocessor/requirements.txt | 4 - .../generic/tps1/tps1_paragraph_1.txt | 6 - .../tps1/tps1_paragraph_1_metadata.json | 15 - .../generic/tps1/tps1_paragraph_3.txt | 3 - .../tps1/tps1_paragraph_3_metadata.json | 12 - .../linux/tps1/tps1_linux_paragraph_2.1.txt | 4 - .../tps1_linux_paragraph_2.1_metadata.json | 15 - .../linux/tps1/tps1_linux_paragraph_2.2.txt | 3 - .../tps1_linux_paragraph_2.2_metadata.json | 12 - .../macos/tps1/tps1_macos_paragraph_2.1.txt | 4 - .../tps1_macos_paragraph_2.1_metadata.json | 15 - .../macos/tps1/tps1_macos_paragraph_2.2.txt | 3 - .../tps1_macos_paragraph_2.2_metadata.json | 12 - .../tps1/tps1_windows_paragraph_2.1.txt | 7 - .../tps1_windows_paragraph_2.1_metadata.json | 15 - .../tps1/tps1_windows_paragraph_2.2.txt | 6 - .../tps1_windows_paragraph_2.2_metadata.json | 12 - .../tests/test_files/ftps/tps1.md | 43 - .../tts1/Main-title/Subtitle-1/Subtitle-1.txt | 2 - .../Subtitle-1/Subtitle-1_metadata.json | 12 - .../Main-title/Subtitle-5-g/Subtitle-5-g.txt | 1 - .../Subtitle-5-g/Subtitle-5-g_metadata.json | 12 - .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 - .../Subtitle-2-g/Subtitle-2-g_metadata.json | 15 - .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 - .../Subtitle-4-l&m_metadata.json | 15 - .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 - .../Subtitle-2-g/Subtitle-2-g_metadata.json | 15 - .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 - .../Subtitle-4-l&m_metadata.json | 15 - .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 - .../Subtitle-2-g/Subtitle-2-g_metadata.json | 15 - .../Subtitle-3-w/Subtitle-3-w.txt | 3 - .../Subtitle-3-w/Subtitle-3-w_metadata.json | 15 - .../tests/test_files/ftts/tts1.md | 31 - .../if_mangler_1_input.md | 4 - .../if_mangler_1_output.md | 4 - .../if_mangler_2_input.md | 7 - .../if_mangler_2_output.md | 7 - .../if_mangler_3_input.md | 6 - .../if_mangler_3_output.md | 6 - .../if_mangler_4_input.md | 4 - .../if_mangler_4_output.md | 4 - .../if_mangler_5_input.md | 11 - .../if_mangler_5_output.md | 11 - .../if_mangler_6_input.md | 8 - .../if_mangler_6_output.md | 8 - .../if_mangler_7_input.md | 9 - .../if_mangler_7_output.md | 9 - .../if_mangler_test_files/if_mangler_input.md | 55 - .../if_mangler_output.md | 55 - .../tests/test_files/list_file/list_test.md | 15 - .../tests/test_full_script.py | 68 - .../tests/test_if_mangler.py | 32 - .../tests/test_insert_links.py | 31 - .../tests/test_links.py | 69 - .../tests/test_lists.py | 27 - .../tests/test_make_valid_title.py | 14 - .../tests/test_replace_markdown_markers.py | 46 - .../tests/test_write_metadata.py | 15 - 62 files changed, 2317 deletions(-) delete mode 100644 scripts/HPC_chatbot_preprocessor/README.md delete mode 100644 scripts/HPC_chatbot_preprocessor/chatbot_parser.py delete mode 100644 scripts/HPC_chatbot_preprocessor/requirements.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_full_script.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_links.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_lists.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md deleted file mode 100644 index 6cfd9be8231..00000000000 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ /dev/null @@ -1,196 +0,0 @@ -# Chatbot parser - -`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. - -## Usage - -The script can be ran in a shell environment with the following command: - -```shell -python chatbot_parser.py -``` - -This command has the following possible options: - -```shell -chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] -``` - -### Options - -#### `h`/`help` - -Display the help message - -#### `src`/`source` - -This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`). - -#### `dst`/`destination` - -This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script. - -#### `st`/`split_on_titles` - -Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length. - -#### `pl`/`min_paragraph_length` - -This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 512 tokens. This options only works if `split_on_titles` is not enabled. - -#### `td`/`max_title_depth` - -This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4. - -#### `l`/`links` - -Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext. - -#### `dd`/`deep_directories` - -Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled. - -## Generated file structure - -The generated directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: - -- `generic` contains the parts of the markdown sources that were non-OS-specific -- `os_specific` contains the parts of the markdown sources that were OS-specific - -Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. - -Both the generic and each of the three os-specific directories then contain a directory for each source file. - -If the option `deep_directories` is not enabled, all paragraphs of the source file and their corresponding metadata will be saved in this directory. The (processed) plaintext of the paragraph is written to a `.txt` file and the metadata is written to a `.json` file. - -If the option `deep_directories` is enabled, the directory of each source file will contain a subdirectory structure corresponding to the structure of the subtitles at different levels in the source file. Each subtitle in the source file corresponds to a directory nested in the directory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). - -Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and a `.json` file with the metadata of that section. - -## Requirements - -- The required Python packages are listed in `requirements.txt` - -## Restrictions on source-files - -Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input. - -### Nested if structures - -The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. - -#### Examples of valid and invalid if-structures - -##### Allowed - -###### non-os-related in os-related - -This is an example of one of the basic allowed if-structures (`NON_OS_IF_IN_OS_IF`) - -``` -if OS == windows: - if site == Gent: - ... - endif -endif -``` - -###### os-related in os-related in non-os-related - -This is an example of the basic allowed if-structure `OS_IF_IN_OS_IF` nested in a non-os-specific if. - -``` -if site == Gent: - if OS == windows: - ... - else: - if OS == Linux: - ... - endif - endif -endif -``` - -##### Not allowed - -###### non-os-related in os-related in os-related - -This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures (`OS_IF_IN_OS_IF`). - -``` -if OS != windows: - if OS == Linux: - if site == Gent: - ... - endif - endif -endif -``` - -This will result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. - -###### os-related in non-os-related in os-related - -This is an example of the basic allowed if-structure `OS_IF` (indirectly) nested in an os-specific if-structure. - -``` -if OS != windows: - if site == Gent: - if OS == Linux: - ... - endif - endif -endif -``` - -This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. - -### Non OS-related if-statements - -Due to the way jinja parses the sourcefiles, the script slightly alters non os-specific if-statements as well. It expects if-statements of the following form: - -``` -{%- if site == gent %} -{% if site != (gent or brussel) %} -``` - -All spaces and the dash are optional. City names don't need to be fully lowercase since the parser will capitalize them properly anyway. - -### html syntax - -The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. - -### Comments - -Any comments within the markdown files (for example TODO's) should follow the following syntax: - -``` - -``` - and should be limited to one line. - -Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such. - -``` - -``` - -This will be reworked to - -``` -your comment for the bot -``` - -in the final output. - -### Long filepaths - -Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`. - -### Markdown lists - -The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays. - -### Links - -Part of the metadata of the parser are links. In order for the links to be built up in the right way, links to external sites should always start with either `https://` or `http://`. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py deleted file mode 100644 index 24e0b287a0a..00000000000 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ /dev/null @@ -1,1236 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import copy -import json -import os -import re -import shutil -import tiktoken -import yaml -from itertools import chain, tee, zip_longest -from pathlib import Path -from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template - -#################### define macro's #################### -# options -SOURCE_DIRECTORY = "SOURCE_DIRECTORY" -DESTINATION_DIRECTORY = "DESTINATION_DIRECTORY" -SPLIT_ON_TITLES = "SPLIT_ON_TITLES" -MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH" -MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH" -INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT" -SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS" -DEEP_DIRECTORIES = "DEEP_DIRECTORIES" -VERBOSE = "VERBOSE" - -# directories -PARSED_MDS = "parsed_mds" -COPIES = "copies" -IF_MANGLED_FILES = "if_mangled_files" -LINUX_TUTORIAL = "linux-tutorial" -RETURN_DIR = ".." -MKDOCS_DIR = "mkdocs" -DOCS_DIR = "docs" -HPC_DIR = "HPC" -EXTRA_DIR = "extra" -GENERIC_DIR = "generic" -OS_SPECIFIC_DIR = "os_specific" -MACROS = "macros" - -# OSes -LINUX = "linux" -WINDOWS = "windows" -MACOS = "macos" -GENERIC = "generic" -LINK_OS = {LINUX: "Linux", WINDOWS: "Windows", MACOS: "macOS"} # OS needs different capitalisation for use in links - -# urls -REPO_URL = 'https://github.com/hpcugent/vsc_user_docs' -DOCS_URL = "https://docs.hpc.ugent.be" - -# OS-related if-states -ACTIVE = "active" -INACTIVE = "inactive" - -# if mangler states -NON_OS_IF = 0 -NON_OS_IF_IN_OS_IF = 1 -OS_IF = 2 -OS_IF_IN_OS_IF = 3 - -# if mangler macros -IF_MANGLED_PART = "-if-" - -# actions -DONE = "done" -WRITE_TEXT = "write_text" -CHECK_EXTRA_MESSAGE = "check_extra_message" -WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message" - -# Metadata attributes -SOURCE_FILE = "source_file" -MAIN_TITLE = "main_title" -SUBTITLE = "subtitle" -TITLE_DEPTH = "title_depth" -DIRECTORY = "directory" -LINKS = "links" -PARENT_TITLE = "parent_title" -PREVIOUS_SUBTITLE = "previous_title" -NEXT_SUBTITLE = "next_title" -METADATA_OS = "OS" -REFERENCE_LINK = "reference_link" - -# if-structure components -IF = "if" -ELSE = "else" -ENDIF = "endif" - -# link indicator -LINK_MARKER = r'Ā§linkĀ§linkĀ§' - -# HTML tags -HTML_TAGS = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] # make sure these are always lowercase - -# regex patterns -IF_MANGLED_PATTERNS = { - IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', - ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})', - ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})' - } - -# filenames (and parts of filenames) -TEMP_JINJA_FILE = "jinja_file.txt" -_PARAGRAPH_ = "_paragraph_" -METADATA_EXTENSION = "_metadata" - -# Marker for comments for the bot -INPUT_FOR_BOT = "INPUT_FOR_BOT: " - -# Standard strings for verbose output -LINE = "------------------------------------------------------------------------------------------------------\n" - - -################### define functions ################### - -def check_for_title(line, in_code_block, curr_dirs, options): - """ - function that checks for titles in the current line. Used by split_text to split the text among the subtitles - - :param line: the current line to be checked for a title - :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles - :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found - :param options: dictionary containing the options given by the user - :return title_length: The amount of hashtags in front of the title on the current line - """ - # detect titles - match = re.match(r'^#+ ', line) - if match and len(match.group(0)) <= options[MAX_TITLE_DEPTH] + 1 and not in_code_block: - title_length = len(match.group(0)) - 1 - if options[DEEP_DIRECTORIES]: - curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) - - # update the higher order current directories - for i in range(title_length + 1, options[MAX_TITLE_DEPTH] + 1): - curr_dirs[i] = curr_dirs[title_length] - - return title_length - else: - return 0 - - -def make_valid_link(link, main_title, is_linux_tutorial): - """ - Function that converts a string to a valid link to be used in the metadata - - :param link: the input string to be turned into a valid link - :param main_title: the main title of the file that contains the link - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return link: the valid link - """ - - # ugly fix for problem with links - linux_tutorial_files = ["beyond_the_basics", "common_pitfalls", "getting_started", "hpc_infrastructure", "index", "manipulating_files_and_directories", "navigating", "uploading_files"] - if is_linux_tutorial and any([linux_tutorial_files[i] in link for i in range(len(linux_tutorial_files))]): - linux_part = LINUX_TUTORIAL + '/' - else: - linux_part = "" - - if link.startswith('http://') or link.startswith('https://') or link.startswith('mailto:'): - pass - else: - if link.startswith("./"): - link = link.replace('./', '') - elif link.startswith("../"): - link = link.replace('../', '') - - if link.startswith("#"): - link = DOCS_URL + '/' + linux_part + main_title + "/" + link - elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]): - link = DOCS_URL + '/' + linux_part + link.replace(".md", "") - elif '.md#' in link: - link = DOCS_URL + '/' + linux_part + link.replace(".md", "/") - else: - link = DOCS_URL + '/' + linux_part + link - - link = link.replace('index/', '').replace('/index', '') - - return link - - -def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_linux_tutorial): - """ - function that replaces certain markdown structures with the equivalent used on the website - - :param curr_line: the current line on which markdown structures need to be replaced - :param linklist: the list used to store links that need to be printed at the end of the file - :param in_code_block: boolean indicating whether the current line is part of a code block - :param main_title: the main title of the file that is being processed - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return curr_line: the adapted current line - :return linklist: the updated linklist - """ - - # replace images with an empty line - if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[.*?]\(img/.*?\.png\)', curr_line): - curr_line = "" - - # replace links with a reference - matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) - if matches: - for match in matches: - curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER) - - linklist.append(make_valid_link(match[1], main_title, is_linux_tutorial)) - - # codeblock (with ``` -> always stands on a separate line, so line can be dropped) - if '```' in curr_line: - curr_line = "" - - # structures within <> - match = re.findall(r'<(.*?)>', curr_line) - if match: - for i, content in enumerate(match): - html_tags_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in HTML_TAGS])) - html_tags_style = [element + " style=.*" for element in HTML_TAGS] - - # add references for every link of format
- if re.search(r'a href=.*', content): - link = content[7:] - curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist)) + LINK_MARKER, curr_line) - linklist.append(link) - - # drop the syntax words - elif content.lower() in html_tags_variations: - curr_line = re.sub(f'<{content}>', "", curr_line) - - # drop the version of the HTML_TAGS followed by " style=" - elif any(re.match(pattern, content) for pattern in html_tags_style): - curr_line = re.sub(r'<.*?>', "", curr_line) - - # keep comments for bot - elif re.fullmatch(r'!--' + INPUT_FOR_BOT + r'.*?--', content): - curr_line = re.sub(r'', lambda m: m.group(1), curr_line) - - # drop comments - elif re.fullmatch(r'!--.*?--', content): - curr_line = re.sub(r'<.*?>', "", curr_line) - - # drop the <> around links - elif re.match(r'http://', content) or re.match(r'https://', content): - curr_line = re.sub(r'<' + content + '>', content, curr_line ) - - # keep the rest - else: - pass - - # structures with !!! (info, tips, warnings) - if '!!!' in curr_line: - curr_line = re.sub(r'!!!', "", curr_line) - - # structures with ??? (collapsable admonitions) - if '???' in curr_line: - curr_line = re.sub(r'\?\?\?', "", curr_line) - - # get rid of other indicators (`, *, +, _) - if not in_code_block: - - backquotes = re.findall(r'`(.*?)`', curr_line) - if backquotes: - for i, content in enumerate(backquotes): - curr_line = curr_line.replace(f"`{content}`", content) - - asterisks = re.findall(r'(?' in line) ^ ('' in line)): - in_code_block = not in_code_block - if options[VERBOSE]: - if in_code_block: - print("Detected start of a codeblock, not registering titles") - else: - print("Detected end of codeblock, registering titles again") - - # only split up if current line is in a fully non-os-specific section - if in_if_statement == 0: - - title_level = check_for_title(line, in_code_block, curr_dirs, options) - - # line is a title with a maximum depth of 4 - if title_level > 0: - if after_first_title: - - # write text of previous file - if previous_contained_if: - paragraphs_os_text[title] = current_paragraph - if options[VERBOSE]: - print("Saved os-specific chunk with temporary title: " + title + "\n") - else: - paragraphs_os_free_text[title] = current_paragraph - if options[VERBOSE]: - print("Saved generic chunk with title: " + title + "\n") - - # write metadata of previous file - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md') - - # make a new title - title = make_valid_title(line[title_level + 1:-1]) - - # create an entry for the file in the paragraphs text dictionary - current_paragraph = "" - - after_first_title = True - subtitle_order.append(title) - - # reset link_list - link_list = [] - - previous_contained_if = False - - # line is not a title - elif after_first_title: - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) - if line != "\n": - current_paragraph += line - - # keep track of title level and directory to write to metadata upon discovering a new subtitle - if title_level > 0: - last_title_level = title_level - last_dir = curr_dirs[last_title_level] - else: - previous_contained_if = True - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) - if line != "\n": - current_paragraph += line - - # write dictionaries for the last file - if previous_contained_if: - paragraphs_os_text[title] = current_paragraph - if options[VERBOSE]: - print("Saved os-specific chunk with temporary title: " + title + "\n") - else: - paragraphs_os_free_text[title] = current_paragraph - if options[VERBOSE]: - print("Saved generic chunk with title: " + title + "\n") - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md') - - return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order - - -def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC): - """ - Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata - - :param file: the filepath of the file to be split - :param main_title: the main title of the file - :param options: dictionary containing the options given by the user - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph - :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph - :return paragraphs_text: dictionary containing the split sections of text - :return paragraphs_metadata: dictionary containing the metadata of each split section of text - :return subtitle_order: list containing all encountered subtitles in order of appearance - """ - - if options[VERBOSE]: - print("Splitting on paragraphs\n") - - # start of assuming we are not in a code_block - in_code_block = False - - # define initial dictionaries - paragraphs_os_free_text = {} - paragraphs_os_text = {} - paragraphs_metadata = {} - - # variable to keep track of the current paragraph - current_paragraph = "" - - # list to keep track of links in the text - link_list = [] - - # list to keep track of the order of the subtitles - subtitle_order = [] - - # variable to keep track of how many if-statements deep the current line is - in_if_statement = 0 - - # variable to indicate that previous section was one with if-statements - previous_contained_if = False - - # variable to indicate that the previous line was part of a list - in_list = False - - # paragraph number to add to title - paragraph_number = 1 - - # metadata title - metadata_title = main_title - - # define metadata data if split occurs on paragraphs and last_title and title_level are known (will be replaced later on in the process) - if current_paragraph_number != -1: - last_title_level = 4 - last_dir = "PLACEHOLDER" - - # list to keep track of most recent directories on each title level - curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] - - with open(file, 'r') as readfile: - - # Create two independent iterators from the original file iterator (needed to check for lists) - current_line, next_line = tee(readfile) - - # Advance the next_line iterator by one step, so it is always one step ahead - next(next_line, None) - - # Process the lines - for line, nxt in zip_longest(current_line, next_line, fillvalue=""): - - # detect if-statements starting or ending on the current line - in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len( - re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) - - # detect whether the current line is in a list - if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line): # beginning of a list entry - in_list = True - if options[VERBOSE]: - print("First line of new list entry found, not starting new paragraphs: " + line[:-1]) - elif re.search(r'^\s{2,}.+$', line) and in_list: # middle of a list entry - pass - elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries - pass - elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', nxt): - in_list = True - elif in_list: - if options[VERBOSE]: - print("List ended, starting new paragraphs again") - in_list = False - else: - in_list = False - - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block - if options[VERBOSE]: - if in_code_block: - print("Detected start of a codeblock, not starting new paragraphs") - else: - print("Detected end of codeblock, starting new paragraphs again") - - # only split up if current line is in a fully non-os-specific section - if in_if_statement == 0: - - title_level = check_for_title(line, in_code_block, curr_dirs, options) - - # check whether a new paragraph should be started - if line == "\n" and paragraph_long_enough(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph), options) and not in_code_block and not in_list: - - # create a title for the previous paragraph - if current_paragraph_number == -1: - paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) - else: - paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) - paragraph_number += 1 - - # write text of previous file - if previous_contained_if: - paragraphs_os_text[paragraph_title] = current_paragraph - if options[VERBOSE]: - print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") - else: - paragraphs_os_free_text[paragraph_title] = current_paragraph - if options[VERBOSE]: - print("Saved generic chunk with title: " + paragraph_title + "\n") - - # write metadata of previous file - paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') - subtitle_order.append(paragraph_title) - - # reset the current paragraph - current_paragraph = "" - - # reset link_list - link_list = [] - - previous_contained_if = False - - # line is a title with a maximum depth of 4 - elif title_level > 0: - - # make a new title - metadata_title = make_valid_title(line[title_level + 1:-1]) - - line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title, is_linux_tutorial) - current_paragraph += line - - # line is not a title or the beginning of a new paragraph - elif line != "\n" or previous_contained_if: - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) - current_paragraph += line - - # keep track of title level and directory to write to metadata upon discovering a new subtitle - if title_level > 0: - last_title_level = title_level - last_dir = curr_dirs[last_title_level] - else: - previous_contained_if = True - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) - current_paragraph += line - - # create a title for the last paragraph - if current_paragraph_number == -1: - paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) - else: - paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) - - # write dictionaries for the last file - if previous_contained_if: - paragraphs_os_text[paragraph_title] = current_paragraph - if options[VERBOSE]: - print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") - else: - paragraphs_os_free_text[paragraph_title] = current_paragraph - if options[VERBOSE]: - print("Saved generic chunk with title: " + paragraph_title + "\n") - paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') - subtitle_order.append(paragraph_title) - - return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order - - -def paragraph_long_enough(paragraph, options): - """ - Function that checks if the paragraph is long enough to be split of - - :param paragraph: current paragraph - :param options: dictionary containing the options given by the user - :return: - """ - encoding = tiktoken.get_encoding("cl100k_base") - token_amount = len(encoding.encode(paragraph)) - - return token_amount >= options[MIN_PARAGRAPH_LENGTH] - - -def write_metadata(main_title, subtitle, links, title_level, directory, source_file): - """ - Function that writes metadata about a text section to a dictionary - - :param main_title: The main title of the file containing the section - :param subtitle: the title of the section - :param links: a list of links contained within the section - :param title_level: the depth of the title of the section - :param directory: the directory where the section will eventually be written (can either be generic or os-specific) - :param source_file: the source file that the section originates from - :return paragraph_metadata: dictionary containing the metadata about the section - """ - - paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory} - - if len(links) > 0: - paragraph_metadata[LINKS] = {} - for i, link in enumerate(links): - paragraph_metadata[LINKS][str(i)] = link - - paragraph_metadata[PARENT_TITLE] = Path(directory).parent.name - - return paragraph_metadata - - -def jinja_parser(filename, copy_location, options): - """ - function that let's jinja do its thing to format the files except for the os-related if-statements - - :param filename: the name of the file that needs to be formatted using jinja - :param copy_location: the location of the file that needs to be formatted using jinja - :param options: dictionary containing the options given by the user - :return: - """ - # YAML file location - yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml') - - if options[VERBOSE]: - print("Reading YAML file from location: " + yml_file_path) - - # Read the YAML file - with open(yml_file_path, 'r') as yml_file: - words_dict = yaml.safe_load(yml_file) - - # ugly fix for index.md error that occurs because of the macro "config.repo_url" in mkdocs/docs/HPC/index.md - additional_context = { - 'config': { - 'repo_url': REPO_URL - } - } - combined_context = {**words_dict, **additional_context} - - if options[VERBOSE]: - print("Mangling OS-specific if-statements") - - # Mangle the OS-related if-statements - mangle_ifs(copy_location, filename, options) - - if options[VERBOSE]: - print("Altering other if-statements to parse properly") - - # Alter the other if-statements - alter_ifs(filename, options) - - # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)]) - templateEnv = Environment(loader=template_loader) - template = templateEnv.get_template(filename) - rendered_content = template.render(combined_context) - - if options[VERBOSE]: - print("jinja parsing finished\nWriting jinja-parsed file to location: " + copy_location) - - # Save the rendered content to a new file - with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: - output_file.write(rendered_content) - - -def load_macros(name): - """ - function used by the jinja FunctionLoader to retrieve templates from the macros folder since the normal FileSystemLoader can't locate them properly - - :param name: name of the package - :return: - """ - - macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, MACROS) - - if "../" + MACROS + "/" in name: - package_name = name.split("../" + MACROS + "/")[1] - file_location = os.path.join(macros_location, package_name) - - with open(file_location, 'r') as readfile: - return readfile.read() - - -def mangle_os_ifs(line, is_os, options): - """ - function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. - We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. - - :param line: the current line to check for os-related if-statements - :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF - NON_OS_IF: not in an os-if - NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if - OS_IF: in an os-if - OS_IF_IN_OS_IF: in an os-if nested in an os-if - :param options: dictionary containing the options given by the user - :return line: the modified line with mangled os-related if-statements - """ - - match = re.search(r'\{%(.*?)%}(.*)', line) - - start_index = 0 - added_length = 0 - - while match: - - constr_match = re.search(r'\{%.*?%}', match.string) - if_match = re.search(r'if ', match.group(1)) - if_os_match = re.search(r'if OS', match.group(1)) - endif_match = re.search(r'endif', match.group(1)) - else_match = re.search(r'else', match.group(1)) - - # mangle positions - pos_first_mangle = constr_match.start() + start_index + added_length + 1 - pos_second_mangle = constr_match.end() + start_index + added_length - 1 - - # different parts of the original string - part_before_mangling = line[:pos_first_mangle] - part_between_mangling = line[pos_first_mangle:pos_second_mangle] - part_after_mangling = line[pos_second_mangle:] - - # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these - if endif_match: - if is_os in (OS_IF, OS_IF_IN_OS_IF): - if options[VERBOSE]: - print("OS-specific endif statement found in line: " + line[:-1]) - line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling - added_length += 2 * len(IF_MANGLED_PART) - if is_os == OS_IF: - is_os = NON_OS_IF - elif is_os == OS_IF_IN_OS_IF: - is_os = OS_IF - elif is_os == NON_OS_IF_IN_OS_IF: - is_os = OS_IF - - elif if_match: - if if_os_match: - if options[VERBOSE]: - print("OS-specific if statement found in line: " + line[:-1]) - line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling - added_length += 2 * len(IF_MANGLED_PART) - if is_os == OS_IF: - is_os = OS_IF_IN_OS_IF - else: - is_os = OS_IF - else: - if is_os == OS_IF: - is_os = NON_OS_IF_IN_OS_IF - else: - is_os = NON_OS_IF - - elif else_match: - if is_os in (OS_IF, OS_IF_IN_OS_IF): - if options[VERBOSE]: - print("OS-specific else statement found in line: " + line[:-1]) - line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling - added_length += 2 * len(IF_MANGLED_PART) - - start_index += constr_match.end() - match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) - return line, is_os - - -def mangle_ifs(directory, filename, options): - """ - function that writes the if-mangled version of a file to a location where the jinja parser will use it - - :param directory: the directory of the file to be if mangled - :param filename: the filename of the file to be mangled - :param options: dictionary containing the options given by the user - :return: - """ - # variable to keep track of latest if-statement scope - is_os = NON_OS_IF - - with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: - with open(directory, 'r') as read_file: - for line in read_file: - new_line, is_os = mangle_os_ifs(line, is_os, options) - write_file.write(new_line) - - -def alter_ifs(filename, options): - """ - Function that further adapts the if-statements in a file and writes it to a location where the jinja parser will use it. - This is because the jinja parser doesn't seem to be able to handle statements like {% site == gent %} with context {'site': 'Gent'} in this case. - These statements get changed to {% site == 'Gent' %} in this function. - - :param filename: the filename of the file to be transformed - :param options: dictionary containing the options given by the user - :return: - """ - - with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'r') as read_file: - content = read_file.read() - - pattern = r'(\{%-?\s?[a-zA-Z\s]*?[!=]=\s?\(?)([a-zA-Z\s]+(?:\sor\s[a-zA-Z\s]+)*)(\)?\s?%})' - content = re.sub(pattern, - lambda match: (f"{match.group(1)}" + - " or ".join([f"'{city.strip().capitalize()}'" for city in match.group(2).split(" or ")]) + - f"{match.group(3)}" - ), - content) - - with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: - write_file.write(content) - - -def make_valid_title(title): - """ - function that makes sure all titles can be used as valid filenames - - :param title: the string that will be used as title and filename - :return valid_filename: the adapted title that can be used as filename - """ - # Define a regex pattern for invalid characters on both Windows and Linux - invalid_chars = r'[<>:"/\\|?*\0]' - - # get rid of extra information between {} brackets - title = re.sub(r'\{.*?}', '', title) - - # Remove invalid characters - valid_filename = re.sub(invalid_chars, '', title) - - # Strip leading/trailing whitespace - valid_filename = valid_filename.strip().strip('-').replace(' ', '-').replace("--", "-") - - return valid_filename - - -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options, is_linux_tutorial): - """ - Function that writes text and metadata of a generic (non-os-specific) file - - :param title: title of section - :param paragraphs_text: dictionary containing all paragraphs of text - :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text - :param title_order: list containing all subtitles in order - :param title_order_number: order number of the title of the section that is being written - :param options: dictionary containing the options given by the user - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return: - """ - - if len(paragraphs_text[title]) > 0: - # make the directory needed for the files that will be written - filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) - - if options[VERBOSE]: - print("Writing generic section " + title + " to filepath: " + str(filepath)) - - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial) - else: - # don't write empty files - pass - - -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options, is_linux_tutorial): - """ - Function to write files to a certain filepath - - :param title: title of the section to be written - :param text: section of text to be written - :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text - :param title_order: list containing all subtitles in order - :param title_order_number: order number of the title of the section that is being written - :param filepath: filepath to write files to - :param OS: OS to be included in the metadata - :param options: dictionary containing the options given by the user - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return: - """ - - metadata = copy.deepcopy(paragraphs_metadata[title]) - - file_title = title - - # write text file - with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile: - if LINKS in paragraphs_metadata[title].keys(): - adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS], options) - writefile.write(adapted_text) - else: - writefile.write(text) - - # write metadata - # check if links in metadata is not empty - if LINKS in metadata.keys() and len(metadata[LINKS].keys()) == 0: - del metadata[LINKS] - - # add previous subtitle - if title_order_number != 0: - metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1] - else: - metadata[PREVIOUS_SUBTITLE] = None - - # add next subtitle - if title_order_number != len(title_order) - 1: - metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1] - else: - metadata[NEXT_SUBTITLE] = None - - # add OS - metadata[METADATA_OS] = OS - - # add reference link - if is_linux_tutorial: - linux_part = LINUX_TUTORIAL + "/" - else: - linux_part = "" - if OS == GENERIC: - os_part = "" - else: - os_part = LINK_OS[OS] + "/" - if "index" not in paragraphs_metadata[title][MAIN_TITLE]: - metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') - else: - metadata[REFERENCE_LINK] = DOCS_URL - - # write metadata to file - with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile: - json.dump(metadata, writefile, indent=4) - - -def insert_links(text, links, options): - """ - Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT - - :param text: The plaintext that needs to be adapted - :param links: The links that might need to be inserted - :param options: dictionary containing the options given by the user - :return text: The adapted plaintext - :return links: The links that were actually present in the text - """ - - present_links = [] - new_links = {} - for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text): - present_links.append(link_number.group(1)) - if options[INCLUDE_LINKS_IN_PLAINTEXT]: - text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text) - else: - text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text) - - for link_number in links.keys(): - if link_number in present_links: - new_links[str(len(new_links.keys()))] = links[link_number] - - return text, new_links - - -def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options, is_linux_tutorial): - """ - Function that splits os-specific sections into subtitles, parses them using jinja and writes them away - - :param text: full os specific section - :param metadata: metadata generated for the full os specific section - :param subtitle_order: order of the subtitles generated by the splitter - :param title_order_number: order number of the section - :param all_metadata: all metadata generated by the splitter - :param options: dictionary containing the options given by the user - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return: - """ - - # Unmangle if's to use jinja parser - text = re.sub(IF_MANGLED_PART, "", text) - - for OS in [LINUX, WINDOWS, MACOS]: - - # slightly alter if-statements to be able to use predefined macros - text = re.sub(OS, '"' + OS + '"', text) - - # Use jinja to render a different version of the text for each OS - template = Template(text) - jinja_text = template.render(OS=OS) - - if len(jinja_text) != 0: - - # add first subtitle in front of section again - if options[SPLIT_ON_TITLES] or metadata[SUBTITLE] not in make_valid_title(jinja_text[:len(metadata[SUBTITLE]) + 1]): - jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text - else: - jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text - - # re-adjust text to correct overcorrections - jinja_text = re.sub('"' + OS + '"', OS, jinja_text) - - with open(TEMP_JINJA_FILE, 'w') as writefile: - writefile.write(jinja_text) - - # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, is_linux_tutorial, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) - - # prepare variables to fix metadata - total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] - all_metadata.update(os_specific_metadata) - - # write to files - for os_i, os_subtitle in enumerate(os_subtitle_order): - # check that file actually has some content - if len(os_specific_text[os_subtitle]) > 0: - # add the links to the metadata - if LINKS in metadata.keys(): - os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] - - # fix parent in the metadata - parent_i = 0 - parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 - parent = os_specific_metadata[os_subtitle][MAIN_TITLE] - - while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): - if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: - parent = total_subtitle_order[parent_i] - parent_i += 1 - - if options[SPLIT_ON_PARAGRAPHS] and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]: - os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE] - else: - os_specific_metadata[os_subtitle][PARENT_TITLE] = parent - - # fix directory in the metadata if needed - if options[DEEP_DIRECTORIES]: - if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) - else: - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) - - # make a directory to save the files - filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) - - if options[VERBOSE]: - print("Writing os-specific section " + os_subtitle + " to filepath: " + str(filepath)) - - # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial) - else: - # don't write empty files - pass - else: - # don't split empty texts - pass - - -def main(options): - """ - main function - - :param options: dictionary containing the options specified by the user to run the script: - {SOURCE_DIRECTORY: The source directory where the original files are located, - DESTINATION_DIRECTORY: The destination directory where the processed files should be written to, - SPLIT_ON_TITLES: boolean indicating whether to split on titles, - SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES), - MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph, - MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, - INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, - DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not, - VERBOSE: enable or disable verbose mode} - :return: - """ - - if options[VERBOSE]: - print("Running chatbot parser with options: " + str(options)) - - if options[DEEP_DIRECTORIES] and options[VERBOSE]: - print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") - - # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS), ignore_errors=True) - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) - - # make the necessary directories - for directory in [COPIES, PARSED_MDS, IF_MANGLED_FILES]: - directory = os.path.join(options[DESTINATION_DIRECTORY], directory) - if not os.path.exists(directory): - os.makedirs(directory) - - ################### define loop-invariant variables ################### - - # constant that keeps track of the source directory - source_directory = options[SOURCE_DIRECTORY] - - # list of all the filenames - filenames = {} - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - filenames[file] = os.path.join(source_directory, file) - - # for loops over all files - for filename in filenames.keys(): - ################### define/reset loop specific variables ################### - - # boolean indicating whether the current file is part of the linux tutorial - is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename]) - - # make a copy of the original file in order to make sure the original does not get altered - copy_file = os.path.join(options[DESTINATION_DIRECTORY], COPIES, filename) - shutil.copyfile(filenames[filename], copy_file) - - # variable that keeps track of the directories that are used to write in at different levels - root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR) - root_dir_os_specific = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR) - root_dir_os_specific_linux = os.path.join(root_dir_os_specific, LINUX) - root_dir_os_specific_windows = os.path.join(root_dir_os_specific, WINDOWS) - root_dir_os_specific_macos = os.path.join(root_dir_os_specific, MACOS) - - # variable for the main title (needed for reference links) - main_title = filename[:-3] - - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for _ in range(options[MAX_TITLE_DEPTH] + 1)] - - ################### actually parse the md file ################### - - if options[VERBOSE]: - print(LINE + "Processing " + filename) - print("Location: " + filenames[filename]) - print("\nMaking directories:") - - # create directories for the source markdown file - for directory in [root_dir_generic, root_dir_os_specific, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - if options[VERBOSE]: - print(directory) - os.makedirs(directory, exist_ok=True) - - if options[VERBOSE]: - print("\nParsing the sourcefile with jinja") - - # process the jinja macros - jinja_parser(filename, copy_file, options) - - if options[VERBOSE]: - print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)") - - # split the text in paragraphs - paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options, is_linux_tutorial) - - if options[VERBOSE]: - print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata") - - # for every section, either make the whole section generic, or create an os-specific file for each OS - for i, subtitle in enumerate(subtitle_order): - - # generic - if subtitle in paragraphs_os_free_text.keys(): - write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options, is_linux_tutorial) - - # os-specific - else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial) - - if options[VERBOSE]: - print("\nFinished processing " + filename) - - if options[VERBOSE]: - print(LINE + "Cleaning up directories:") - print(os.path.join(options[DESTINATION_DIRECTORY], COPIES)) - print(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES)) - print(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL)) - # clean up temporary directories and files - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL), ignore_errors=True) - if os.path.exists(TEMP_JINJA_FILE): - os.remove(TEMP_JINJA_FILE) - - if options[VERBOSE]: - print("Parsing finished successfully") - - -################### run the script ################### -if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") - - # adding command-line options - parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") - parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") - parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=512, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)") - parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") - parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") - parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") - parser.add_argument("-v", "--verbose", action="store_true", help="Run the script with verbose output") - - args = parser.parse_args() - - options_dict = {SOURCE_DIRECTORY: args.source, - DESTINATION_DIRECTORY: args.destination, - SPLIT_ON_TITLES: args.split_on_titles, - SPLIT_ON_PARAGRAPHS: not args.split_on_titles, - MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, - MAX_TITLE_DEPTH: args.max_title_depth, - INCLUDE_LINKS_IN_PLAINTEXT: args.links, - DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles, - VERBOSE: args.verbose} - - main(options_dict) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt deleted file mode 100644 index 37137582aad..00000000000 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -PyYAML==6.0.2 -Jinja2==3.1.4 -tiktoken~=0.7.0 -pathlib~=1.0.1 \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt deleted file mode 100644 index 94270ff37e3..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt +++ /dev/null @@ -1,6 +0,0 @@ -Main title -This is the first paragraph of text. It is non-os-specific, however it does contain a link. -It also contains some other Markdown syntax and an -example code block. -This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum -character limit for a section). diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json deleted file mode 100644 index 08c0b4e4973..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Main-title", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 1, - "directory": "tps1", - "links": { - "0": "https://docs.hpc.ugent.be/generic" - }, - "parent_title": "", - "previous_title": null, - "next_title": "tps1_paragraph_2", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt deleted file mode 100644 index 58eedc06aa0..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt +++ /dev/null @@ -1,3 +0,0 @@ -Conclusion -Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I -might add to this if needed. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json deleted file mode 100644 index 2f1ea4dcd1f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Conclusion", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 2, - "directory": "tps1", - "parent_title": "", - "previous_title": "tps1_paragraph_2", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt deleted file mode 100644 index d0ee9ce8256..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt +++ /dev/null @@ -1,4 +0,0 @@ -OS specific sections -This is the second section, it is the start of some -text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will -still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json deleted file mode 100644 index 208cb3472f4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "OS-specific-sections", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 2, - "directory": "tps1", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/linuxmacos" - }, - "previous_title": "tps1_paragraph_1", - "next_title": "tps1_linux_paragraph_2.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt deleted file mode 100644 index 1a3867e69fa..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt +++ /dev/null @@ -1,3 +0,0 @@ -Non Windows section -Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise -section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json deleted file mode 100644 index b975dfe4e03..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Non-Windows-section", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 3, - "directory": "tps1", - "parent_title": "OS-specific-sections", - "previous_title": "tps1_linux_paragraph_2.1", - "next_title": "tps1_paragraph_3", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt deleted file mode 100644 index e0642d6ac96..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt +++ /dev/null @@ -1,4 +0,0 @@ -OS specific sections -This is the second section, it is the start of some -text specific to OSes that aren't "windows". I feel like there is no need to make this section very long, however I will -still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json deleted file mode 100644 index 9c605eb9004..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "OS-specific-sections", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 2, - "directory": "tps1", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/linuxmacos" - }, - "previous_title": "tps1_paragraph_1", - "next_title": "tps1_macos_paragraph_2.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt deleted file mode 100644 index 1a3867e69fa..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt +++ /dev/null @@ -1,3 +0,0 @@ -Non Windows section -Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise -section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json deleted file mode 100644 index e3ca81d7cc5..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Non-Windows-section", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 3, - "directory": "tps1", - "parent_title": "OS-specific-sections", - "previous_title": "tps1_macos_paragraph_2.1", - "next_title": "tps1_paragraph_3", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt deleted file mode 100644 index 9a9cbe1f3d2..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt +++ /dev/null @@ -1,7 +0,0 @@ -OS specific sections -This is the second section, it is the start of some text specific to windows. -In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer -to make sure we get a long section that is over the minimum required length for the next newline character to be -classified as the end of this section. I am doing this because for the next sections I want to test whether they will be -grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, -let's add a link in this section as well. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json deleted file mode 100644 index ab58c622b8c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "OS-specific-sections", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 2, - "directory": "tps1", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/windows" - }, - "previous_title": "tps1_paragraph_1", - "next_title": "tps1_windows_paragraph_2.2", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt deleted file mode 100644 index 6b57235f68f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt +++ /dev/null @@ -1,6 +0,0 @@ -Windows specific section -Like this. -And this. -And also this. -These section should all be grouped together under the windows specific section of the output. The addition of this long -section at the end should make sure the combination of sections comes to an end here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json deleted file mode 100644 index 435c9e9c484..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Windows-specific-section", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 3, - "directory": "tps1", - "parent_title": "OS-specific-sections", - "previous_title": "tps1_windows_paragraph_2.1", - "next_title": "tps1_paragraph_3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md deleted file mode 100644 index d9b10d0c524..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md +++ /dev/null @@ -1,43 +0,0 @@ -# Main title - -This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md). -It also contains some `other` *Markdown* _syntax_ and an -```shell -example code block. -``` -This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum -character limit for a section). - -## OS specific sections - -This is the second section, it is the start of some {% if OS == windows %} text specific to windows. -In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer -to make sure we get a long section that is over the minimum required length for the next newline character to be -classified as the end of this section. I am doing this because for the next sections I want to test whether they will be -grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, -let's add [a link](windows.md) in this section as well. - -### Windows specific section - -Like this. - -And this. - -And also this. - -These section should all be grouped together under the windows specific section of the output. The addition of this long -section at the end should make sure the combination of sections comes to an end here. -{% else %} -text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will -still add [a link](linuxmacos.md). - -### Non Windows section - -Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise -section that ends right here. -{% endif %} - -## Conclusion - -Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I -might add to this if needed. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt deleted file mode 100644 index f62a4f31fee..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt +++ /dev/null @@ -1,2 +0,0 @@ -blablabla -blablablabla diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json deleted file mode 100644 index b7786c066a7..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-1", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-1", - "parent_title": "Main-title", - "previous_title": "Main-title", - "next_title": "Subtitle-2-g", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-1" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt deleted file mode 100644 index bdf68551202..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt +++ /dev/null @@ -1 +0,0 @@ -blablabla \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json deleted file mode 100644 index eb5403804e2..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-5-g", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-5-g", - "parent_title": "Main-title", - "previous_title": "Subtitle-2-g", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-5-g" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt deleted file mode 100644 index 48125d91679..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt +++ /dev/null @@ -1,4 +0,0 @@ -blablabla generic -blablabla generic -blablabla Linux macOS -blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json deleted file mode 100644 index f7330bec86d..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-2-g", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-2-g", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/linuxmacos" - }, - "previous_title": "Subtitle-1", - "next_title": "Subtitle-4-l&m", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-2-g" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt deleted file mode 100644 index b221f26074b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt +++ /dev/null @@ -1,3 +0,0 @@ -blablabla Linux macOS -blablablabla Linux macOS -blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json deleted file mode 100644 index a76f852c874..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-4-l&m", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 3, - "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", - "parent_title": "Subtitle-2-g", - "links": { - "0": "https://docs.hpc.ugent.be/generic" - }, - "previous_title": "Subtitle-2-g", - "next_title": "Subtitle-5-g", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-4-lm" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt deleted file mode 100644 index 48125d91679..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt +++ /dev/null @@ -1,4 +0,0 @@ -blablabla generic -blablabla generic -blablabla Linux macOS -blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json deleted file mode 100644 index 8b234c92fa6..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-2-g", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-2-g", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/linuxmacos" - }, - "previous_title": "Subtitle-1", - "next_title": "Subtitle-4-l&m", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-2-g" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt deleted file mode 100644 index b221f26074b..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt +++ /dev/null @@ -1,3 +0,0 @@ -blablabla Linux macOS -blablablabla Linux macOS -blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json deleted file mode 100644 index 732d309da81..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-4-l&m", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 3, - "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", - "parent_title": "Subtitle-2-g", - "links": { - "0": "https://docs.hpc.ugent.be/generic" - }, - "previous_title": "Subtitle-2-g", - "next_title": "Subtitle-5-g", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-4-lm" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt deleted file mode 100644 index f9f20592832..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt +++ /dev/null @@ -1,4 +0,0 @@ -blablabla generic -blablabla generic -blablabla windows -blablabla windows with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json deleted file mode 100644 index 7a43426a85f..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-2-g", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-2-g", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/windows" - }, - "previous_title": "Subtitle-1", - "next_title": "Subtitle-3-w", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-2-g" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt deleted file mode 100644 index 0b587cef85a..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt +++ /dev/null @@ -1,3 +0,0 @@ -blablabla windows -blablablabla windows -blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json deleted file mode 100644 index 4d7f494320d..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-3-w", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 3, - "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w", - "parent_title": "Subtitle-2-g", - "links": { - "0": "https://docs.hpc.ugent.be/generic" - }, - "previous_title": "Subtitle-2-g", - "next_title": "Subtitle-5-g", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-3-w" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md deleted file mode 100644 index 2f3ad7f9c08..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md +++ /dev/null @@ -1,31 +0,0 @@ -# Main title - -## Subtitle 1 - -blablabla -blablablabla - -## Subtitle 2 g - -blablabla generic -blablabla generic -{% if OS == windows %}blablabla windows -blablabla windows with a [link](windows.md) - -### Subtitle 3 w - -blablabla windows -blablablabla windows -{% else %}blablabla Linux macOS -blablablabla Linux macOS with a [link](linuxmacos.md) - -### Subtitle 4 l&m - -blablabla Linux macOS -blablablabla Linux macOS -{% endif %} -blablabla generic with a [link](generic.md) - -## Subtitle 5 g - -blablabla diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md deleted file mode 100644 index 6a74b3c0181..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md +++ /dev/null @@ -1,4 +0,0 @@ -test1: OS_IF -{% if OS == windows %} -test1 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md deleted file mode 100644 index 2f9cdc38294..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md +++ /dev/null @@ -1,4 +0,0 @@ -test1: OS_IF -{-if-% if OS == windows %-if-} -test1 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md deleted file mode 100644 index 360a4a59ba3..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md +++ /dev/null @@ -1,7 +0,0 @@ -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{% if OS == windows %} -test2 -{% endif %} -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md deleted file mode 100644 index 798dcf6db24..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md +++ /dev/null @@ -1,7 +0,0 @@ -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{-if-% if OS == windows %-if-} -test2 -{-if-% endif %-if-} -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md deleted file mode 100644 index d93125a5971..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md +++ /dev/null @@ -1,6 +0,0 @@ -test3: OS_IF with else -{% if OS == linux %} -test3 -{% else %} -test3 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md deleted file mode 100644 index 02141961338..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md +++ /dev/null @@ -1,6 +0,0 @@ -test3: OS_IF with else -{-if-% if OS == linux %-if-} -test3 -{-if-% else %-if-} -test3 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md deleted file mode 100644 index cc15fae1df1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md +++ /dev/null @@ -1,4 +0,0 @@ -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md deleted file mode 100644 index cc15fae1df1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md +++ /dev/null @@ -1,4 +0,0 @@ -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md deleted file mode 100644 index bdb288474e2..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md +++ /dev/null @@ -1,11 +0,0 @@ -test5: OS_IF in OS_IF -{% if OS == windows %} -test5 -{% else %} -{% if OS == linux %} -test5 -{% else %} -test5 -{% endif %} -test5 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md deleted file mode 100644 index 10443eb67a4..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md +++ /dev/null @@ -1,11 +0,0 @@ -test5: OS_IF in OS_IF -{-if-% if OS == windows %-if-} -test5 -{-if-% else %-if-} -{-if-% if OS == linux %-if-} -test5 -{-if-% else %-if-} -test5 -{-if-% endif %-if-} -test5 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md deleted file mode 100644 index 0731ee3588c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md +++ /dev/null @@ -1,8 +0,0 @@ -test6: NON_OS_IF in OS_IF -{% if OS == macos %} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md deleted file mode 100644 index cd37117cb00..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md +++ /dev/null @@ -1,8 +0,0 @@ -test6: NON_OS_IF in OS_IF -{-if-% if OS == macos %-if-} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md deleted file mode 100644 index 6a72a338527..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md +++ /dev/null @@ -1,9 +0,0 @@ -test7: weird spacing and dashes - {%if OS == windows %} - test7 -{%- else%} - test7 - {% if OS == linux%} -test7 - {%-endif %} -{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md deleted file mode 100644 index dfe342ebfb1..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md +++ /dev/null @@ -1,9 +0,0 @@ -test7: weird spacing and dashes - {-if-%if OS == windows %-if-} - test7 -{-if-%- else%-if-} - test7 - {-if-% if OS == linux%-if-} -test7 - {-if-%-endif %-if-} -{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md deleted file mode 100644 index fb8c1f8b539..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md +++ /dev/null @@ -1,55 +0,0 @@ -test1: OS_IF -{% if OS == windows %} -test1 -{% endif %} - -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{% if OS == windows %} -test2 -{% endif %} -{% endif %} - -test3: OS_IF with else -{% if OS == linux %} -test3 -{% else %} -test3 -{% endif %} - -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } - -test5: OS_IF in OS_IF -{% if OS == windows %} -test5 -{% else %} -{% if OS == linux %} -test5 -{% else %} -test5 -{% endif %} -test5 -{% endif %} - -test6: NON_OS_IF in OS_IF -{% if OS == macos %} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{% endif %} - -test7: weird spacing and dashes - {%if OS == windows %} - test7 -{%- else%} - test7 - {% if OS == linux%} -test7 - {%-endif %} -{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md deleted file mode 100644 index 796e94348fa..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md +++ /dev/null @@ -1,55 +0,0 @@ -test1: OS_IF -{-if-% if OS == windows %-if-} -test1 -{-if-% endif %-if-} - -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{-if-% if OS == windows %-if-} -test2 -{-if-% endif %-if-} -{% endif %} - -test3: OS_IF with else -{-if-% if OS == linux %-if-} -test3 -{-if-% else %-if-} -test3 -{-if-% endif %-if-} - -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } - -test5: OS_IF in OS_IF -{-if-% if OS == windows %-if-} -test5 -{-if-% else %-if-} -{-if-% if OS == linux %-if-} -test5 -{-if-% else %-if-} -test5 -{-if-% endif %-if-} -test5 -{-if-% endif %-if-} - -test6: NON_OS_IF in OS_IF -{-if-% if OS == macos %-if-} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{-if-% endif %-if-} - -test7: weird spacing and dashes - {-if-%if OS == windows %-if-} - test7 -{-if-%- else%-if-} - test7 - {-if-% if OS == linux%-if-} -test7 - {-if-%-endif %-if-} -{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md deleted file mode 100644 index 1e18a1495d5..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md +++ /dev/null @@ -1,15 +0,0 @@ -# Title - -Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list. - -1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list. - -2. Second entry - -3. Third entry - - ![image](img/an_image_for_the_third_entry.png) - -4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit. - -And now the text continues like normal in a new section. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py deleted file mode 100644 index 91605dec651..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py +++ /dev/null @@ -1,68 +0,0 @@ -import pytest -import os -import shutil -from chatbot_parser import main - - -@pytest.mark.parametrize("input_directory,actual_output_directory,expected_output_directory, options", [ - ("tests/test_files/ftps", "tests/test_files/ftps/actual", - "tests/test_files/ftps/output", - {"SOURCE_DIRECTORY": "tests/test_files/ftps", - "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 160, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False} - ), - ("tests/test_files/ftts", "tests/test_files/ftts/actual", - "tests/test_files/ftts/output", - {"SOURCE_DIRECTORY": "tests/test_files/ftts", - "DESTINATION_DIRECTORY": "tests/test_files/ftts/actual", - "SPLIT_ON_TITLES": True, - "SPLIT_ON_PARAGRAPHS": False, - "MIN_PARAGRAPH_LENGTH": 160, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": True, - "VERBOSE": False} - ) -]) -def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options): - # run the script - main(options) - - # Compare directories and files - for dirpath, dirnames, filenames in os.walk(expected_output_directory): - relative_path = os.path.relpath(dirpath, expected_output_directory) - actual_dir = os.path.join(actual_output_directory, relative_path) - - # Check if the directory exists - assert os.path.isdir(actual_dir), f"Directory '{actual_dir}' is missing." - - # Check for files - for filename in filenames: - ref_file = os.path.join(dirpath, filename) - gen_file = os.path.join(actual_dir, filename) - - # Check if the file exists - assert os.path.isfile(gen_file), f"File '{gen_file}' is missing." - - # Check file content - with open(ref_file, 'r') as ref_f, open(gen_file, 'r') as gen_f: - ref_content = ref_f.read().strip() - gen_content = gen_f.read().strip() - assert ref_content == gen_content, f"Content of file '{gen_file}' does not match." - - # check that not too many directories have been generated - for dirpath, dirnames, filenames in os.walk(actual_output_directory): - relative_path = os.path.relpath(dirpath, actual_output_directory) - expected_dir = os.path.join(expected_output_directory, relative_path) - - # Check if the directory exists - assert os.path.isdir(expected_dir), f"Directory '{relative_path}' was made, but shouldn't have been." - - # remove directory - shutil.rmtree(actual_output_directory, ignore_errors=True) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py deleted file mode 100644 index 4d0dd876103..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py +++ /dev/null @@ -1,32 +0,0 @@ -import pytest -import os -import shutil -from chatbot_parser import mangle_ifs - - -@pytest.mark.parametrize("input_file,output_file", [ - ("if_mangler_1_input.md", "if_mangler_1_output.md"), - ("if_mangler_2_input.md", "if_mangler_2_output.md"), - ("if_mangler_3_input.md", "if_mangler_3_output.md"), - ("if_mangler_4_input.md", "if_mangler_4_output.md"), - ("if_mangler_5_input.md", "if_mangler_5_output.md"), - ("if_mangler_6_input.md", "if_mangler_6_output.md"), - ("if_mangler_7_input.md", "if_mangler_7_output.md") -]) -def test_if_mangler(input_file, output_file): - # make directory - os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) - - # make filepaths - input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) - expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) - actual_output_file_path = os.path.join("if_mangled_files", input_file) - mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) - - # check every line - with open(expected_output_file_path, "r") as expected_read_file: - with open(actual_output_file_path, "r") as actual_read_file: - assert all([expected_line == actual_line for expected_line, actual_line in zip(expected_read_file, actual_read_file)]) - - # remove directory - shutil.rmtree("if_mangled_files", ignore_errors=True) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py deleted file mode 100644 index 9109f2518ad..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py +++ /dev/null @@ -1,31 +0,0 @@ -import pytest -from chatbot_parser import insert_links - -options_include = {"INCLUDE_LINKS_IN_PLAINTEXT": True} -options_leave_out = {"INCLUDE_LINKS_IN_PLAINTEXT": False} -links_input = {"0": "https://first_link.com", "1": "https://second_link.be", "2": "https://docs.hpc.ugent.be/account#welcome-e-mail", "3": "https://final-link.org"} - - -@pytest.mark.parametrize("text_input, options_input, text_output, new_links", [ - # Text without links - # don't include links - ("Text without links\nand with two lines.", options_leave_out, "Text without links\nand with two lines.", {}), - # include links - ("Text without links\nand with two lines.", options_include, "Text without links\nand with two lines.", {}), - # Text with all links - # don't include links - ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§0Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§2Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§3Ā§linkĀ§linkĀ§", options_leave_out, - "Text with all the links\nand with multiple lines.\n\n\n\n", links_input), - # include links - ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§0Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§2Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§3Ā§linkĀ§linkĀ§", options_include, - "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://second_link.be \n https://docs.hpc.ugent.be/account#welcome-e-mail \n https://final-link.org ", links_input), - # Text with some links - # don't include links - ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§3Ā§linkĀ§linkĀ§", options_leave_out, - "Text with all the links\nand with multiple lines.\n\n", {"0": "https://second_link.be", "1": "https://final-link.org"}), - # include links - ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§0Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§2Ā§linkĀ§linkĀ§", options_include, - "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://docs.hpc.ugent.be/account#welcome-e-mail ", {"0": "https://first_link.com", "1": "https://docs.hpc.ugent.be/account#welcome-e-mail"}) -]) -def test_insert_links(text_input, options_input, text_output, new_links): - assert insert_links(text_input, links_input, options_input) == (text_output, new_links) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py deleted file mode 100644 index d1acca1d740..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_links.py +++ /dev/null @@ -1,69 +0,0 @@ -import os -import pytest -from urllib import request -from chatbot_parser import main -import json - -whitelist = ["mailto:hpc@ugent.be"] -slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"] - -options_general = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC", - "DESTINATION_DIRECTORY": ".", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 683, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False} -options_os_specific = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC/linux-tutorial", - "DESTINATION_DIRECTORY": "./linux-tutorial", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 683, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False} - - -@pytest.mark.parametrize("options", [options_general, options_os_specific]) -def test_all_links(options): - all_links = {} - main(options) - broken_links = {} - empty_links = {} - - for (dirpath, dirnames, filenames) in os.walk(os.path.join(options['DESTINATION_DIRECTORY'], 'parsed_mds')): - for filename in filenames: - all_links[filename] = [] - if filename.endswith('metadata.json'): - data = json.load(open(os.path.join(dirpath, filename))) - if 'links' in data.keys(): - for key in data['links'].keys(): - all_links[filename].append(data['links'][key]) - all_links[filename].append(data['reference_link'].split("#")[0]) - - for filename in all_links.keys(): - all_links[filename] = list(set(all_links[filename])) - for link in all_links[filename]: - if len(link) != 0: - try: - if link not in whitelist and link not in slow_list: - with request.urlopen(link) as res: - if res.status == 200: - pass - except: - print("Broken link in " + filename + ": " + link) - if filename in broken_links.keys(): - broken_links[filename].append(link) - else: - broken_links[filename] = [link] - else: - print("Empty link in " + filename) - if filename in empty_links.keys(): - empty_links[filename].append(link) - else: - empty_links[filename] = [link] - assert len(empty_links.keys()) == 0 - assert len(broken_links.keys()) == 0 diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py deleted file mode 100644 index 06e56a5cb2c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest -from chatbot_parser import split_on_paragraphs - - -@pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [ - ("./test_files/list_file/list_test.md", - "list_test.md", - { - "SOURCE_DIRECTORY": "./test_files/list_file", - "DESTINATION_DIRECTORY": "./test_files/list_file", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 100, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False - }, - False, - { - 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", - 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.' - } - ) -]) -def test_links(file, main_title, options, is_linux_tutorial, expected_text): - assert split_on_paragraphs(file, main_title, options, is_linux_tutorial)[1] == expected_text diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py deleted file mode 100644 index 225c368477d..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py +++ /dev/null @@ -1,14 +0,0 @@ -import pytest -from chatbot_parser import make_valid_title - - -@pytest.mark.parametrize("input_string,expected", [ - ("", ""), - ("A-good-filename-with-dashes", "A-good-filename-with-dashes"), - (" A very good filename beginning and ending in a space ", "A-very-good-filename-beginning-and-ending-in-a-space"), - ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"), - ("A filename containing bad characters <>:\"/\\|?*\0", "A-filename-containing-bad-characters"), - ("A filename ending with {some jinja garbage}", "A-filename-ending-with") -]) -def test_make_valid_title(input_string, expected): - assert make_valid_title(input_string) == expected diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py deleted file mode 100644 index f4cee6dd75c..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py +++ /dev/null @@ -1,46 +0,0 @@ -import pytest -from chatbot_parser import replace_markdown_markers - - -@pytest.mark.parametrize("input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist", [ - # baseline test - ("A normal line with nothing special", [], False, "", "A normal line with nothing special", []), - # image 1 - ("![image](a-nice-image.png)", [], False, "", "", []), - # image 2 - ("![](img/Look-at-this-photograph.png)", [], False, "", "", []), - # link 1 (outside docs) - ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "", - "A line with a linkĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "a-nice-link.com"]), - # link 2 (another document within the docs) - ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "", - "A line with a link to the docsĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), - # link 3 (the same document) - ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md", - "A line with a link to the same docĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), - # codeblock - ("```shell", [], True, "", "", []), - # html syntax 1 (normal syntax) - ("A line with something in Bold", [], False, "", "A line with something in Bold", []), - # html syntax 2 (link) - ("A line with another link
", ["other-website.com"], False, "", - "A line with another linkĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["other-website.com", "website.com"]), - # html syntax 3 (style) - ("

A line with style

", [], False, "", "A line with style", []), - # Bot comment - ("", [], False, "", "Something about the following table", []), - # non-Bot comment - ("", [], False, "", "", []), - # something else with <> - ("A line with an example where you should put ", [], False, "", "A line with an example where you should put ", []), - # info/tips/warnings - ("!!! warning", [], False, "", " warning", []), - # collapsable admonitions - ("??? note", [], False, "", " note", []), - # Markdown syntax 1 (not in code block) - ("`Line` **with** ++a++ _lot_ *of* _++markdown++_ `syntax`", [], False, "", "Line with a lot of markdown syntax", []), - # Markdown syntax 2 (in code block) - ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", []) -]) -def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist): - assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py deleted file mode 100644 index 6c30fef7985..00000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py +++ /dev/null @@ -1,15 +0,0 @@ -import pytest -import os -from chatbot_parser import write_metadata - - -@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [ - ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), - ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2, - os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source", - {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, - "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), - "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}}) -]) -def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output): - assert write_metadata(main_title, subtitle, links, title_level, directory, source_file) == output From 445f7eec653638100120addeae1c25114e69022c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 14:48:58 +0200 Subject: [PATCH 145/152] Revert "removing unnecessary files" This reverts commit 32b8b741c8582a98b122b230742e1be09ba8c698. --- scripts/HPC_chatbot_preprocessor/README.md | 196 +++ .../chatbot_parser.py | 1236 +++++++++++++++++ .../HPC_chatbot_preprocessor/requirements.txt | 4 + .../generic/tps1/tps1_paragraph_1.txt | 6 + .../tps1/tps1_paragraph_1_metadata.json | 15 + .../generic/tps1/tps1_paragraph_3.txt | 3 + .../tps1/tps1_paragraph_3_metadata.json | 12 + .../linux/tps1/tps1_linux_paragraph_2.1.txt | 4 + .../tps1_linux_paragraph_2.1_metadata.json | 15 + .../linux/tps1/tps1_linux_paragraph_2.2.txt | 3 + .../tps1_linux_paragraph_2.2_metadata.json | 12 + .../macos/tps1/tps1_macos_paragraph_2.1.txt | 4 + .../tps1_macos_paragraph_2.1_metadata.json | 15 + .../macos/tps1/tps1_macos_paragraph_2.2.txt | 3 + .../tps1_macos_paragraph_2.2_metadata.json | 12 + .../tps1/tps1_windows_paragraph_2.1.txt | 7 + .../tps1_windows_paragraph_2.1_metadata.json | 15 + .../tps1/tps1_windows_paragraph_2.2.txt | 6 + .../tps1_windows_paragraph_2.2_metadata.json | 12 + .../tests/test_files/ftps/tps1.md | 43 + .../tts1/Main-title/Subtitle-1/Subtitle-1.txt | 2 + .../Subtitle-1/Subtitle-1_metadata.json | 12 + .../Main-title/Subtitle-5-g/Subtitle-5-g.txt | 1 + .../Subtitle-5-g/Subtitle-5-g_metadata.json | 12 + .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 15 + .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 + .../Subtitle-4-l&m_metadata.json | 15 + .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 15 + .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 + .../Subtitle-4-l&m_metadata.json | 15 + .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 15 + .../Subtitle-3-w/Subtitle-3-w.txt | 3 + .../Subtitle-3-w/Subtitle-3-w_metadata.json | 15 + .../tests/test_files/ftts/tts1.md | 31 + .../if_mangler_1_input.md | 4 + .../if_mangler_1_output.md | 4 + .../if_mangler_2_input.md | 7 + .../if_mangler_2_output.md | 7 + .../if_mangler_3_input.md | 6 + .../if_mangler_3_output.md | 6 + .../if_mangler_4_input.md | 4 + .../if_mangler_4_output.md | 4 + .../if_mangler_5_input.md | 11 + .../if_mangler_5_output.md | 11 + .../if_mangler_6_input.md | 8 + .../if_mangler_6_output.md | 8 + .../if_mangler_7_input.md | 9 + .../if_mangler_7_output.md | 9 + .../if_mangler_test_files/if_mangler_input.md | 55 + .../if_mangler_output.md | 55 + .../tests/test_files/list_file/list_test.md | 15 + .../tests/test_full_script.py | 68 + .../tests/test_if_mangler.py | 32 + .../tests/test_insert_links.py | 31 + .../tests/test_links.py | 69 + .../tests/test_lists.py | 27 + .../tests/test_make_valid_title.py | 14 + .../tests/test_replace_markdown_markers.py | 46 + .../tests/test_write_metadata.py | 15 + 62 files changed, 2317 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/README.md create mode 100644 scripts/HPC_chatbot_preprocessor/chatbot_parser.py create mode 100644 scripts/HPC_chatbot_preprocessor/requirements.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_full_script.py create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_links.py create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_lists.py create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md new file mode 100644 index 00000000000..6cfd9be8231 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -0,0 +1,196 @@ +# Chatbot parser + +`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. + +## Usage + +The script can be ran in a shell environment with the following command: + +```shell +python chatbot_parser.py +``` + +This command has the following possible options: + +```shell +chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] +``` + +### Options + +#### `h`/`help` + +Display the help message + +#### `src`/`source` + +This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`). + +#### `dst`/`destination` + +This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script. + +#### `st`/`split_on_titles` + +Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length. + +#### `pl`/`min_paragraph_length` + +This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 512 tokens. This options only works if `split_on_titles` is not enabled. + +#### `td`/`max_title_depth` + +This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4. + +#### `l`/`links` + +Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext. + +#### `dd`/`deep_directories` + +Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled. + +## Generated file structure + +The generated directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: + +- `generic` contains the parts of the markdown sources that were non-OS-specific +- `os_specific` contains the parts of the markdown sources that were OS-specific + +Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. + +Both the generic and each of the three os-specific directories then contain a directory for each source file. + +If the option `deep_directories` is not enabled, all paragraphs of the source file and their corresponding metadata will be saved in this directory. The (processed) plaintext of the paragraph is written to a `.txt` file and the metadata is written to a `.json` file. + +If the option `deep_directories` is enabled, the directory of each source file will contain a subdirectory structure corresponding to the structure of the subtitles at different levels in the source file. Each subtitle in the source file corresponds to a directory nested in the directory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). + +Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and a `.json` file with the metadata of that section. + +## Requirements + +- The required Python packages are listed in `requirements.txt` + +## Restrictions on source-files + +Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input. + +### Nested if structures + +The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. + +#### Examples of valid and invalid if-structures + +##### Allowed + +###### non-os-related in os-related + +This is an example of one of the basic allowed if-structures (`NON_OS_IF_IN_OS_IF`) + +``` +if OS == windows: + if site == Gent: + ... + endif +endif +``` + +###### os-related in os-related in non-os-related + +This is an example of the basic allowed if-structure `OS_IF_IN_OS_IF` nested in a non-os-specific if. + +``` +if site == Gent: + if OS == windows: + ... + else: + if OS == Linux: + ... + endif + endif +endif +``` + +##### Not allowed + +###### non-os-related in os-related in os-related + +This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures (`OS_IF_IN_OS_IF`). + +``` +if OS != windows: + if OS == Linux: + if site == Gent: + ... + endif + endif +endif +``` + +This will result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. + +###### os-related in non-os-related in os-related + +This is an example of the basic allowed if-structure `OS_IF` (indirectly) nested in an os-specific if-structure. + +``` +if OS != windows: + if site == Gent: + if OS == Linux: + ... + endif + endif +endif +``` + +This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. + +### Non OS-related if-statements + +Due to the way jinja parses the sourcefiles, the script slightly alters non os-specific if-statements as well. It expects if-statements of the following form: + +``` +{%- if site == gent %} +{% if site != (gent or brussel) %} +``` + +All spaces and the dash are optional. City names don't need to be fully lowercase since the parser will capitalize them properly anyway. + +### html syntax + +The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. + +### Comments + +Any comments within the markdown files (for example TODO's) should follow the following syntax: + +``` + +``` + and should be limited to one line. + +Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such. + +``` + +``` + +This will be reworked to + +``` +your comment for the bot +``` + +in the final output. + +### Long filepaths + +Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`. + +### Markdown lists + +The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays. + +### Links + +Part of the metadata of the parser are links. In order for the links to be built up in the right way, links to external sites should always start with either `https://` or `http://`. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py new file mode 100644 index 00000000000..24e0b287a0a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -0,0 +1,1236 @@ +#!/usr/bin/env python3 + +import argparse +import copy +import json +import os +import re +import shutil +import tiktoken +import yaml +from itertools import chain, tee, zip_longest +from pathlib import Path +from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template + +#################### define macro's #################### +# options +SOURCE_DIRECTORY = "SOURCE_DIRECTORY" +DESTINATION_DIRECTORY = "DESTINATION_DIRECTORY" +SPLIT_ON_TITLES = "SPLIT_ON_TITLES" +MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH" +MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH" +INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT" +SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS" +DEEP_DIRECTORIES = "DEEP_DIRECTORIES" +VERBOSE = "VERBOSE" + +# directories +PARSED_MDS = "parsed_mds" +COPIES = "copies" +IF_MANGLED_FILES = "if_mangled_files" +LINUX_TUTORIAL = "linux-tutorial" +RETURN_DIR = ".." +MKDOCS_DIR = "mkdocs" +DOCS_DIR = "docs" +HPC_DIR = "HPC" +EXTRA_DIR = "extra" +GENERIC_DIR = "generic" +OS_SPECIFIC_DIR = "os_specific" +MACROS = "macros" + +# OSes +LINUX = "linux" +WINDOWS = "windows" +MACOS = "macos" +GENERIC = "generic" +LINK_OS = {LINUX: "Linux", WINDOWS: "Windows", MACOS: "macOS"} # OS needs different capitalisation for use in links + +# urls +REPO_URL = 'https://github.com/hpcugent/vsc_user_docs' +DOCS_URL = "https://docs.hpc.ugent.be" + +# OS-related if-states +ACTIVE = "active" +INACTIVE = "inactive" + +# if mangler states +NON_OS_IF = 0 +NON_OS_IF_IN_OS_IF = 1 +OS_IF = 2 +OS_IF_IN_OS_IF = 3 + +# if mangler macros +IF_MANGLED_PART = "-if-" + +# actions +DONE = "done" +WRITE_TEXT = "write_text" +CHECK_EXTRA_MESSAGE = "check_extra_message" +WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message" + +# Metadata attributes +SOURCE_FILE = "source_file" +MAIN_TITLE = "main_title" +SUBTITLE = "subtitle" +TITLE_DEPTH = "title_depth" +DIRECTORY = "directory" +LINKS = "links" +PARENT_TITLE = "parent_title" +PREVIOUS_SUBTITLE = "previous_title" +NEXT_SUBTITLE = "next_title" +METADATA_OS = "OS" +REFERENCE_LINK = "reference_link" + +# if-structure components +IF = "if" +ELSE = "else" +ENDIF = "endif" + +# link indicator +LINK_MARKER = r'Ā§linkĀ§linkĀ§' + +# HTML tags +HTML_TAGS = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] # make sure these are always lowercase + +# regex patterns +IF_MANGLED_PATTERNS = { + IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', + ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})', + ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})' + } + +# filenames (and parts of filenames) +TEMP_JINJA_FILE = "jinja_file.txt" +_PARAGRAPH_ = "_paragraph_" +METADATA_EXTENSION = "_metadata" + +# Marker for comments for the bot +INPUT_FOR_BOT = "INPUT_FOR_BOT: " + +# Standard strings for verbose output +LINE = "------------------------------------------------------------------------------------------------------\n" + + +################### define functions ################### + +def check_for_title(line, in_code_block, curr_dirs, options): + """ + function that checks for titles in the current line. Used by split_text to split the text among the subtitles + + :param line: the current line to be checked for a title + :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles + :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found + :param options: dictionary containing the options given by the user + :return title_length: The amount of hashtags in front of the title on the current line + """ + # detect titles + match = re.match(r'^#+ ', line) + if match and len(match.group(0)) <= options[MAX_TITLE_DEPTH] + 1 and not in_code_block: + title_length = len(match.group(0)) - 1 + if options[DEEP_DIRECTORIES]: + curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) + + # update the higher order current directories + for i in range(title_length + 1, options[MAX_TITLE_DEPTH] + 1): + curr_dirs[i] = curr_dirs[title_length] + + return title_length + else: + return 0 + + +def make_valid_link(link, main_title, is_linux_tutorial): + """ + Function that converts a string to a valid link to be used in the metadata + + :param link: the input string to be turned into a valid link + :param main_title: the main title of the file that contains the link + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial + :return link: the valid link + """ + + # ugly fix for problem with links + linux_tutorial_files = ["beyond_the_basics", "common_pitfalls", "getting_started", "hpc_infrastructure", "index", "manipulating_files_and_directories", "navigating", "uploading_files"] + if is_linux_tutorial and any([linux_tutorial_files[i] in link for i in range(len(linux_tutorial_files))]): + linux_part = LINUX_TUTORIAL + '/' + else: + linux_part = "" + + if link.startswith('http://') or link.startswith('https://') or link.startswith('mailto:'): + pass + else: + if link.startswith("./"): + link = link.replace('./', '') + elif link.startswith("../"): + link = link.replace('../', '') + + if link.startswith("#"): + link = DOCS_URL + '/' + linux_part + main_title + "/" + link + elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]): + link = DOCS_URL + '/' + linux_part + link.replace(".md", "") + elif '.md#' in link: + link = DOCS_URL + '/' + linux_part + link.replace(".md", "/") + else: + link = DOCS_URL + '/' + linux_part + link + + link = link.replace('index/', '').replace('/index', '') + + return link + + +def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_linux_tutorial): + """ + function that replaces certain markdown structures with the equivalent used on the website + + :param curr_line: the current line on which markdown structures need to be replaced + :param linklist: the list used to store links that need to be printed at the end of the file + :param in_code_block: boolean indicating whether the current line is part of a code block + :param main_title: the main title of the file that is being processed + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial + :return curr_line: the adapted current line + :return linklist: the updated linklist + """ + + # replace images with an empty line + if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[.*?]\(img/.*?\.png\)', curr_line): + curr_line = "" + + # replace links with a reference + matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) + if matches: + for match in matches: + curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER) + + linklist.append(make_valid_link(match[1], main_title, is_linux_tutorial)) + + # codeblock (with ``` -> always stands on a separate line, so line can be dropped) + if '```' in curr_line: + curr_line = "" + + # structures within <> + match = re.findall(r'<(.*?)>', curr_line) + if match: + for i, content in enumerate(match): + html_tags_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in HTML_TAGS])) + html_tags_style = [element + " style=.*" for element in HTML_TAGS] + + # add references for every link of format
+ if re.search(r'a href=.*', content): + link = content[7:] + curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist)) + LINK_MARKER, curr_line) + linklist.append(link) + + # drop the syntax words + elif content.lower() in html_tags_variations: + curr_line = re.sub(f'<{content}>', "", curr_line) + + # drop the version of the HTML_TAGS followed by " style=" + elif any(re.match(pattern, content) for pattern in html_tags_style): + curr_line = re.sub(r'<.*?>', "", curr_line) + + # keep comments for bot + elif re.fullmatch(r'!--' + INPUT_FOR_BOT + r'.*?--', content): + curr_line = re.sub(r'', lambda m: m.group(1), curr_line) + + # drop comments + elif re.fullmatch(r'!--.*?--', content): + curr_line = re.sub(r'<.*?>', "", curr_line) + + # drop the <> around links + elif re.match(r'http://', content) or re.match(r'https://', content): + curr_line = re.sub(r'<' + content + '>', content, curr_line ) + + # keep the rest + else: + pass + + # structures with !!! (info, tips, warnings) + if '!!!' in curr_line: + curr_line = re.sub(r'!!!', "", curr_line) + + # structures with ??? (collapsable admonitions) + if '???' in curr_line: + curr_line = re.sub(r'\?\?\?', "", curr_line) + + # get rid of other indicators (`, *, +, _) + if not in_code_block: + + backquotes = re.findall(r'`(.*?)`', curr_line) + if backquotes: + for i, content in enumerate(backquotes): + curr_line = curr_line.replace(f"`{content}`", content) + + asterisks = re.findall(r'(?' in line) ^ ('' in line)): + in_code_block = not in_code_block + if options[VERBOSE]: + if in_code_block: + print("Detected start of a codeblock, not registering titles") + else: + print("Detected end of codeblock, registering titles again") + + # only split up if current line is in a fully non-os-specific section + if in_if_statement == 0: + + title_level = check_for_title(line, in_code_block, curr_dirs, options) + + # line is a title with a maximum depth of 4 + if title_level > 0: + if after_first_title: + + # write text of previous file + if previous_contained_if: + paragraphs_os_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + title + "\n") + else: + paragraphs_os_free_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + title + "\n") + + # write metadata of previous file + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md') + + # make a new title + title = make_valid_title(line[title_level + 1:-1]) + + # create an entry for the file in the paragraphs text dictionary + current_paragraph = "" + + after_first_title = True + subtitle_order.append(title) + + # reset link_list + link_list = [] + + previous_contained_if = False + + # line is not a title + elif after_first_title: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) + if line != "\n": + current_paragraph += line + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] + else: + previous_contained_if = True + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) + if line != "\n": + current_paragraph += line + + # write dictionaries for the last file + if previous_contained_if: + paragraphs_os_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + title + "\n") + else: + paragraphs_os_free_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + title + "\n") + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md') + + return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order + + +def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC): + """ + Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata + + :param file: the filepath of the file to be split + :param main_title: the main title of the file + :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial + :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph + :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph + :return paragraphs_text: dictionary containing the split sections of text + :return paragraphs_metadata: dictionary containing the metadata of each split section of text + :return subtitle_order: list containing all encountered subtitles in order of appearance + """ + + if options[VERBOSE]: + print("Splitting on paragraphs\n") + + # start of assuming we are not in a code_block + in_code_block = False + + # define initial dictionaries + paragraphs_os_free_text = {} + paragraphs_os_text = {} + paragraphs_metadata = {} + + # variable to keep track of the current paragraph + current_paragraph = "" + + # list to keep track of links in the text + link_list = [] + + # list to keep track of the order of the subtitles + subtitle_order = [] + + # variable to keep track of how many if-statements deep the current line is + in_if_statement = 0 + + # variable to indicate that previous section was one with if-statements + previous_contained_if = False + + # variable to indicate that the previous line was part of a list + in_list = False + + # paragraph number to add to title + paragraph_number = 1 + + # metadata title + metadata_title = main_title + + # define metadata data if split occurs on paragraphs and last_title and title_level are known (will be replaced later on in the process) + if current_paragraph_number != -1: + last_title_level = 4 + last_dir = "PLACEHOLDER" + + # list to keep track of most recent directories on each title level + curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] + + with open(file, 'r') as readfile: + + # Create two independent iterators from the original file iterator (needed to check for lists) + current_line, next_line = tee(readfile) + + # Advance the next_line iterator by one step, so it is always one step ahead + next(next_line, None) + + # Process the lines + for line, nxt in zip_longest(current_line, next_line, fillvalue=""): + + # detect if-statements starting or ending on the current line + in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len( + re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) + + # detect whether the current line is in a list + if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line): # beginning of a list entry + in_list = True + if options[VERBOSE]: + print("First line of new list entry found, not starting new paragraphs: " + line[:-1]) + elif re.search(r'^\s{2,}.+$', line) and in_list: # middle of a list entry + pass + elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries + pass + elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', nxt): + in_list = True + elif in_list: + if options[VERBOSE]: + print("List ended, starting new paragraphs again") + in_list = False + else: + in_list = False + + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block + if options[VERBOSE]: + if in_code_block: + print("Detected start of a codeblock, not starting new paragraphs") + else: + print("Detected end of codeblock, starting new paragraphs again") + + # only split up if current line is in a fully non-os-specific section + if in_if_statement == 0: + + title_level = check_for_title(line, in_code_block, curr_dirs, options) + + # check whether a new paragraph should be started + if line == "\n" and paragraph_long_enough(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph), options) and not in_code_block and not in_list: + + # create a title for the previous paragraph + if current_paragraph_number == -1: + paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) + else: + paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) + paragraph_number += 1 + + # write text of previous file + if previous_contained_if: + paragraphs_os_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") + else: + paragraphs_os_free_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + paragraph_title + "\n") + + # write metadata of previous file + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') + subtitle_order.append(paragraph_title) + + # reset the current paragraph + current_paragraph = "" + + # reset link_list + link_list = [] + + previous_contained_if = False + + # line is a title with a maximum depth of 4 + elif title_level > 0: + + # make a new title + metadata_title = make_valid_title(line[title_level + 1:-1]) + + line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title, is_linux_tutorial) + current_paragraph += line + + # line is not a title or the beginning of a new paragraph + elif line != "\n" or previous_contained_if: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) + current_paragraph += line + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] + else: + previous_contained_if = True + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) + current_paragraph += line + + # create a title for the last paragraph + if current_paragraph_number == -1: + paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) + else: + paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) + + # write dictionaries for the last file + if previous_contained_if: + paragraphs_os_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") + else: + paragraphs_os_free_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + paragraph_title + "\n") + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') + subtitle_order.append(paragraph_title) + + return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order + + +def paragraph_long_enough(paragraph, options): + """ + Function that checks if the paragraph is long enough to be split of + + :param paragraph: current paragraph + :param options: dictionary containing the options given by the user + :return: + """ + encoding = tiktoken.get_encoding("cl100k_base") + token_amount = len(encoding.encode(paragraph)) + + return token_amount >= options[MIN_PARAGRAPH_LENGTH] + + +def write_metadata(main_title, subtitle, links, title_level, directory, source_file): + """ + Function that writes metadata about a text section to a dictionary + + :param main_title: The main title of the file containing the section + :param subtitle: the title of the section + :param links: a list of links contained within the section + :param title_level: the depth of the title of the section + :param directory: the directory where the section will eventually be written (can either be generic or os-specific) + :param source_file: the source file that the section originates from + :return paragraph_metadata: dictionary containing the metadata about the section + """ + + paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory} + + if len(links) > 0: + paragraph_metadata[LINKS] = {} + for i, link in enumerate(links): + paragraph_metadata[LINKS][str(i)] = link + + paragraph_metadata[PARENT_TITLE] = Path(directory).parent.name + + return paragraph_metadata + + +def jinja_parser(filename, copy_location, options): + """ + function that let's jinja do its thing to format the files except for the os-related if-statements + + :param filename: the name of the file that needs to be formatted using jinja + :param copy_location: the location of the file that needs to be formatted using jinja + :param options: dictionary containing the options given by the user + :return: + """ + # YAML file location + yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml') + + if options[VERBOSE]: + print("Reading YAML file from location: " + yml_file_path) + + # Read the YAML file + with open(yml_file_path, 'r') as yml_file: + words_dict = yaml.safe_load(yml_file) + + # ugly fix for index.md error that occurs because of the macro "config.repo_url" in mkdocs/docs/HPC/index.md + additional_context = { + 'config': { + 'repo_url': REPO_URL + } + } + combined_context = {**words_dict, **additional_context} + + if options[VERBOSE]: + print("Mangling OS-specific if-statements") + + # Mangle the OS-related if-statements + mangle_ifs(copy_location, filename, options) + + if options[VERBOSE]: + print("Altering other if-statements to parse properly") + + # Alter the other if-statements + alter_ifs(filename, options) + + # Use Jinja2 to replace the macros + template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)]) + templateEnv = Environment(loader=template_loader) + template = templateEnv.get_template(filename) + rendered_content = template.render(combined_context) + + if options[VERBOSE]: + print("jinja parsing finished\nWriting jinja-parsed file to location: " + copy_location) + + # Save the rendered content to a new file + with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: + output_file.write(rendered_content) + + +def load_macros(name): + """ + function used by the jinja FunctionLoader to retrieve templates from the macros folder since the normal FileSystemLoader can't locate them properly + + :param name: name of the package + :return: + """ + + macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, MACROS) + + if "../" + MACROS + "/" in name: + package_name = name.split("../" + MACROS + "/")[1] + file_location = os.path.join(macros_location, package_name) + + with open(file_location, 'r') as readfile: + return readfile.read() + + +def mangle_os_ifs(line, is_os, options): + """ + function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. + We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. + + :param line: the current line to check for os-related if-statements + :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF + NON_OS_IF: not in an os-if + NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if + OS_IF: in an os-if + OS_IF_IN_OS_IF: in an os-if nested in an os-if + :param options: dictionary containing the options given by the user + :return line: the modified line with mangled os-related if-statements + """ + + match = re.search(r'\{%(.*?)%}(.*)', line) + + start_index = 0 + added_length = 0 + + while match: + + constr_match = re.search(r'\{%.*?%}', match.string) + if_match = re.search(r'if ', match.group(1)) + if_os_match = re.search(r'if OS', match.group(1)) + endif_match = re.search(r'endif', match.group(1)) + else_match = re.search(r'else', match.group(1)) + + # mangle positions + pos_first_mangle = constr_match.start() + start_index + added_length + 1 + pos_second_mangle = constr_match.end() + start_index + added_length - 1 + + # different parts of the original string + part_before_mangling = line[:pos_first_mangle] + part_between_mangling = line[pos_first_mangle:pos_second_mangle] + part_after_mangling = line[pos_second_mangle:] + + # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these + if endif_match: + if is_os in (OS_IF, OS_IF_IN_OS_IF): + if options[VERBOSE]: + print("OS-specific endif statement found in line: " + line[:-1]) + line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling + added_length += 2 * len(IF_MANGLED_PART) + if is_os == OS_IF: + is_os = NON_OS_IF + elif is_os == OS_IF_IN_OS_IF: + is_os = OS_IF + elif is_os == NON_OS_IF_IN_OS_IF: + is_os = OS_IF + + elif if_match: + if if_os_match: + if options[VERBOSE]: + print("OS-specific if statement found in line: " + line[:-1]) + line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling + added_length += 2 * len(IF_MANGLED_PART) + if is_os == OS_IF: + is_os = OS_IF_IN_OS_IF + else: + is_os = OS_IF + else: + if is_os == OS_IF: + is_os = NON_OS_IF_IN_OS_IF + else: + is_os = NON_OS_IF + + elif else_match: + if is_os in (OS_IF, OS_IF_IN_OS_IF): + if options[VERBOSE]: + print("OS-specific else statement found in line: " + line[:-1]) + line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling + added_length += 2 * len(IF_MANGLED_PART) + + start_index += constr_match.end() + match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) + return line, is_os + + +def mangle_ifs(directory, filename, options): + """ + function that writes the if-mangled version of a file to a location where the jinja parser will use it + + :param directory: the directory of the file to be if mangled + :param filename: the filename of the file to be mangled + :param options: dictionary containing the options given by the user + :return: + """ + # variable to keep track of latest if-statement scope + is_os = NON_OS_IF + + with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: + with open(directory, 'r') as read_file: + for line in read_file: + new_line, is_os = mangle_os_ifs(line, is_os, options) + write_file.write(new_line) + + +def alter_ifs(filename, options): + """ + Function that further adapts the if-statements in a file and writes it to a location where the jinja parser will use it. + This is because the jinja parser doesn't seem to be able to handle statements like {% site == gent %} with context {'site': 'Gent'} in this case. + These statements get changed to {% site == 'Gent' %} in this function. + + :param filename: the filename of the file to be transformed + :param options: dictionary containing the options given by the user + :return: + """ + + with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'r') as read_file: + content = read_file.read() + + pattern = r'(\{%-?\s?[a-zA-Z\s]*?[!=]=\s?\(?)([a-zA-Z\s]+(?:\sor\s[a-zA-Z\s]+)*)(\)?\s?%})' + content = re.sub(pattern, + lambda match: (f"{match.group(1)}" + + " or ".join([f"'{city.strip().capitalize()}'" for city in match.group(2).split(" or ")]) + + f"{match.group(3)}" + ), + content) + + with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: + write_file.write(content) + + +def make_valid_title(title): + """ + function that makes sure all titles can be used as valid filenames + + :param title: the string that will be used as title and filename + :return valid_filename: the adapted title that can be used as filename + """ + # Define a regex pattern for invalid characters on both Windows and Linux + invalid_chars = r'[<>:"/\\|?*\0]' + + # get rid of extra information between {} brackets + title = re.sub(r'\{.*?}', '', title) + + # Remove invalid characters + valid_filename = re.sub(invalid_chars, '', title) + + # Strip leading/trailing whitespace + valid_filename = valid_filename.strip().strip('-').replace(' ', '-').replace("--", "-") + + return valid_filename + + +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options, is_linux_tutorial): + """ + Function that writes text and metadata of a generic (non-os-specific) file + + :param title: title of section + :param paragraphs_text: dictionary containing all paragraphs of text + :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text + :param title_order: list containing all subtitles in order + :param title_order_number: order number of the title of the section that is being written + :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial + :return: + """ + + if len(paragraphs_text[title]) > 0: + # make the directory needed for the files that will be written + filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) + + if options[VERBOSE]: + print("Writing generic section " + title + " to filepath: " + str(filepath)) + + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial) + else: + # don't write empty files + pass + + +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options, is_linux_tutorial): + """ + Function to write files to a certain filepath + + :param title: title of the section to be written + :param text: section of text to be written + :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text + :param title_order: list containing all subtitles in order + :param title_order_number: order number of the title of the section that is being written + :param filepath: filepath to write files to + :param OS: OS to be included in the metadata + :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial + :return: + """ + + metadata = copy.deepcopy(paragraphs_metadata[title]) + + file_title = title + + # write text file + with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile: + if LINKS in paragraphs_metadata[title].keys(): + adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS], options) + writefile.write(adapted_text) + else: + writefile.write(text) + + # write metadata + # check if links in metadata is not empty + if LINKS in metadata.keys() and len(metadata[LINKS].keys()) == 0: + del metadata[LINKS] + + # add previous subtitle + if title_order_number != 0: + metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1] + else: + metadata[PREVIOUS_SUBTITLE] = None + + # add next subtitle + if title_order_number != len(title_order) - 1: + metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1] + else: + metadata[NEXT_SUBTITLE] = None + + # add OS + metadata[METADATA_OS] = OS + + # add reference link + if is_linux_tutorial: + linux_part = LINUX_TUTORIAL + "/" + else: + linux_part = "" + if OS == GENERIC: + os_part = "" + else: + os_part = LINK_OS[OS] + "/" + if "index" not in paragraphs_metadata[title][MAIN_TITLE]: + metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') + else: + metadata[REFERENCE_LINK] = DOCS_URL + + # write metadata to file + with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile: + json.dump(metadata, writefile, indent=4) + + +def insert_links(text, links, options): + """ + Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT + + :param text: The plaintext that needs to be adapted + :param links: The links that might need to be inserted + :param options: dictionary containing the options given by the user + :return text: The adapted plaintext + :return links: The links that were actually present in the text + """ + + present_links = [] + new_links = {} + for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text): + present_links.append(link_number.group(1)) + if options[INCLUDE_LINKS_IN_PLAINTEXT]: + text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text) + else: + text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text) + + for link_number in links.keys(): + if link_number in present_links: + new_links[str(len(new_links.keys()))] = links[link_number] + + return text, new_links + + +def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options, is_linux_tutorial): + """ + Function that splits os-specific sections into subtitles, parses them using jinja and writes them away + + :param text: full os specific section + :param metadata: metadata generated for the full os specific section + :param subtitle_order: order of the subtitles generated by the splitter + :param title_order_number: order number of the section + :param all_metadata: all metadata generated by the splitter + :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial + :return: + """ + + # Unmangle if's to use jinja parser + text = re.sub(IF_MANGLED_PART, "", text) + + for OS in [LINUX, WINDOWS, MACOS]: + + # slightly alter if-statements to be able to use predefined macros + text = re.sub(OS, '"' + OS + '"', text) + + # Use jinja to render a different version of the text for each OS + template = Template(text) + jinja_text = template.render(OS=OS) + + if len(jinja_text) != 0: + + # add first subtitle in front of section again + if options[SPLIT_ON_TITLES] or metadata[SUBTITLE] not in make_valid_title(jinja_text[:len(metadata[SUBTITLE]) + 1]): + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text + else: + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text + + # re-adjust text to correct overcorrections + jinja_text = re.sub('"' + OS + '"', OS, jinja_text) + + with open(TEMP_JINJA_FILE, 'w') as writefile: + writefile.write(jinja_text) + + # split in right way + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, is_linux_tutorial, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + + # prepare variables to fix metadata + total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] + all_metadata.update(os_specific_metadata) + + # write to files + for os_i, os_subtitle in enumerate(os_subtitle_order): + # check that file actually has some content + if len(os_specific_text[os_subtitle]) > 0: + # add the links to the metadata + if LINKS in metadata.keys(): + os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] + + # fix parent in the metadata + parent_i = 0 + parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 + parent = os_specific_metadata[os_subtitle][MAIN_TITLE] + + while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): + if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: + parent = total_subtitle_order[parent_i] + parent_i += 1 + + if options[SPLIT_ON_PARAGRAPHS] and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]: + os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE] + else: + os_specific_metadata[os_subtitle][PARENT_TITLE] = parent + + # fix directory in the metadata if needed + if options[DEEP_DIRECTORIES]: + if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) + else: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + + # make a directory to save the files + filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) + + if options[VERBOSE]: + print("Writing os-specific section " + os_subtitle + " to filepath: " + str(filepath)) + + # write to files + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial) + else: + # don't write empty files + pass + else: + # don't split empty texts + pass + + +def main(options): + """ + main function + + :param options: dictionary containing the options specified by the user to run the script: + {SOURCE_DIRECTORY: The source directory where the original files are located, + DESTINATION_DIRECTORY: The destination directory where the processed files should be written to, + SPLIT_ON_TITLES: boolean indicating whether to split on titles, + SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES), + MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph, + MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, + INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, + DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not, + VERBOSE: enable or disable verbose mode} + :return: + """ + + if options[VERBOSE]: + print("Running chatbot parser with options: " + str(options)) + + if options[DEEP_DIRECTORIES] and options[VERBOSE]: + print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") + + # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) + + # make the necessary directories + for directory in [COPIES, PARSED_MDS, IF_MANGLED_FILES]: + directory = os.path.join(options[DESTINATION_DIRECTORY], directory) + if not os.path.exists(directory): + os.makedirs(directory) + + ################### define loop-invariant variables ################### + + # constant that keeps track of the source directory + source_directory = options[SOURCE_DIRECTORY] + + # list of all the filenames + filenames = {} + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + filenames[file] = os.path.join(source_directory, file) + + # for loops over all files + for filename in filenames.keys(): + ################### define/reset loop specific variables ################### + + # boolean indicating whether the current file is part of the linux tutorial + is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename]) + + # make a copy of the original file in order to make sure the original does not get altered + copy_file = os.path.join(options[DESTINATION_DIRECTORY], COPIES, filename) + shutil.copyfile(filenames[filename], copy_file) + + # variable that keeps track of the directories that are used to write in at different levels + root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR) + root_dir_os_specific = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR) + root_dir_os_specific_linux = os.path.join(root_dir_os_specific, LINUX) + root_dir_os_specific_windows = os.path.join(root_dir_os_specific, WINDOWS) + root_dir_os_specific_macos = os.path.join(root_dir_os_specific, MACOS) + + # variable for the main title (needed for reference links) + main_title = filename[:-3] + + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for _ in range(options[MAX_TITLE_DEPTH] + 1)] + + ################### actually parse the md file ################### + + if options[VERBOSE]: + print(LINE + "Processing " + filename) + print("Location: " + filenames[filename]) + print("\nMaking directories:") + + # create directories for the source markdown file + for directory in [root_dir_generic, root_dir_os_specific, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: + if options[VERBOSE]: + print(directory) + os.makedirs(directory, exist_ok=True) + + if options[VERBOSE]: + print("\nParsing the sourcefile with jinja") + + # process the jinja macros + jinja_parser(filename, copy_file, options) + + if options[VERBOSE]: + print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)") + + # split the text in paragraphs + paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options, is_linux_tutorial) + + if options[VERBOSE]: + print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata") + + # for every section, either make the whole section generic, or create an os-specific file for each OS + for i, subtitle in enumerate(subtitle_order): + + # generic + if subtitle in paragraphs_os_free_text.keys(): + write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options, is_linux_tutorial) + + # os-specific + else: + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial) + + if options[VERBOSE]: + print("\nFinished processing " + filename) + + if options[VERBOSE]: + print(LINE + "Cleaning up directories:") + print(os.path.join(options[DESTINATION_DIRECTORY], COPIES)) + print(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES)) + print(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL)) + # clean up temporary directories and files + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL), ignore_errors=True) + if os.path.exists(TEMP_JINJA_FILE): + os.remove(TEMP_JINJA_FILE) + + if options[VERBOSE]: + print("Parsing finished successfully") + + +################### run the script ################### +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") + + # adding command-line options + parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") + parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") + parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=512, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)") + parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") + parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") + parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") + parser.add_argument("-v", "--verbose", action="store_true", help="Run the script with verbose output") + + args = parser.parse_args() + + options_dict = {SOURCE_DIRECTORY: args.source, + DESTINATION_DIRECTORY: args.destination, + SPLIT_ON_TITLES: args.split_on_titles, + SPLIT_ON_PARAGRAPHS: not args.split_on_titles, + MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, + MAX_TITLE_DEPTH: args.max_title_depth, + INCLUDE_LINKS_IN_PLAINTEXT: args.links, + DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles, + VERBOSE: args.verbose} + + main(options_dict) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt new file mode 100644 index 00000000000..37137582aad --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -0,0 +1,4 @@ +PyYAML==6.0.2 +Jinja2==3.1.4 +tiktoken~=0.7.0 +pathlib~=1.0.1 \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt new file mode 100644 index 00000000000..94270ff37e3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt @@ -0,0 +1,6 @@ +Main title +This is the first paragraph of text. It is non-os-specific, however it does contain a link. +It also contains some other Markdown syntax and an +example code block. +This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum +character limit for a section). diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json new file mode 100644 index 00000000000..08c0b4e4973 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tps1", + "subtitle": "Main-title", + "source_file": "tests/test_files/ftps/tps1.md", + "title_depth": 1, + "directory": "tps1", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "parent_title": "", + "previous_title": null, + "next_title": "tps1_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt new file mode 100644 index 00000000000..58eedc06aa0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt @@ -0,0 +1,3 @@ +Conclusion +Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I +might add to this if needed. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json new file mode 100644 index 00000000000..2f1ea4dcd1f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "tps1", + "subtitle": "Conclusion", + "source_file": "tests/test_files/ftps/tps1.md", + "title_depth": 2, + "directory": "tps1", + "parent_title": "", + "previous_title": "tps1_paragraph_2", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt new file mode 100644 index 00000000000..d0ee9ce8256 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt @@ -0,0 +1,4 @@ +OS specific sections +This is the second section, it is the start of some +text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will +still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json new file mode 100644 index 00000000000..208cb3472f4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tps1", + "subtitle": "OS-specific-sections", + "source_file": "tests/test_files/ftps/tps1.md", + "title_depth": 2, + "directory": "tps1", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "tps1_paragraph_1", + "next_title": "tps1_linux_paragraph_2.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt new file mode 100644 index 00000000000..1a3867e69fa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt @@ -0,0 +1,3 @@ +Non Windows section +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json new file mode 100644 index 00000000000..b975dfe4e03 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "tps1", + "subtitle": "Non-Windows-section", + "source_file": "tests/test_files/ftps/tps1.md", + "title_depth": 3, + "directory": "tps1", + "parent_title": "OS-specific-sections", + "previous_title": "tps1_linux_paragraph_2.1", + "next_title": "tps1_paragraph_3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt new file mode 100644 index 00000000000..e0642d6ac96 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt @@ -0,0 +1,4 @@ +OS specific sections +This is the second section, it is the start of some +text specific to OSes that aren't "windows". I feel like there is no need to make this section very long, however I will +still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json new file mode 100644 index 00000000000..9c605eb9004 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tps1", + "subtitle": "OS-specific-sections", + "source_file": "tests/test_files/ftps/tps1.md", + "title_depth": 2, + "directory": "tps1", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "tps1_paragraph_1", + "next_title": "tps1_macos_paragraph_2.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt new file mode 100644 index 00000000000..1a3867e69fa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt @@ -0,0 +1,3 @@ +Non Windows section +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json new file mode 100644 index 00000000000..e3ca81d7cc5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "tps1", + "subtitle": "Non-Windows-section", + "source_file": "tests/test_files/ftps/tps1.md", + "title_depth": 3, + "directory": "tps1", + "parent_title": "OS-specific-sections", + "previous_title": "tps1_macos_paragraph_2.1", + "next_title": "tps1_paragraph_3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt new file mode 100644 index 00000000000..9a9cbe1f3d2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt @@ -0,0 +1,7 @@ +OS specific sections +This is the second section, it is the start of some text specific to windows. +In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer +to make sure we get a long section that is over the minimum required length for the next newline character to be +classified as the end of this section. I am doing this because for the next sections I want to test whether they will be +grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, +let's add a link in this section as well. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json new file mode 100644 index 00000000000..ab58c622b8c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tps1", + "subtitle": "OS-specific-sections", + "source_file": "tests/test_files/ftps/tps1.md", + "title_depth": 2, + "directory": "tps1", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/windows" + }, + "previous_title": "tps1_paragraph_1", + "next_title": "tps1_windows_paragraph_2.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt new file mode 100644 index 00000000000..6b57235f68f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt @@ -0,0 +1,6 @@ +Windows specific section +Like this. +And this. +And also this. +These section should all be grouped together under the windows specific section of the output. The addition of this long +section at the end should make sure the combination of sections comes to an end here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json new file mode 100644 index 00000000000..435c9e9c484 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "tps1", + "subtitle": "Windows-specific-section", + "source_file": "tests/test_files/ftps/tps1.md", + "title_depth": 3, + "directory": "tps1", + "parent_title": "OS-specific-sections", + "previous_title": "tps1_windows_paragraph_2.1", + "next_title": "tps1_paragraph_3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md new file mode 100644 index 00000000000..d9b10d0c524 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md @@ -0,0 +1,43 @@ +# Main title + +This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md). +It also contains some `other` *Markdown* _syntax_ and an +```shell +example code block. +``` +This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum +character limit for a section). + +## OS specific sections + +This is the second section, it is the start of some {% if OS == windows %} text specific to windows. +In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer +to make sure we get a long section that is over the minimum required length for the next newline character to be +classified as the end of this section. I am doing this because for the next sections I want to test whether they will be +grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, +let's add [a link](windows.md) in this section as well. + +### Windows specific section + +Like this. + +And this. + +And also this. + +These section should all be grouped together under the windows specific section of the output. The addition of this long +section at the end should make sure the combination of sections comes to an end here. +{% else %} +text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will +still add [a link](linuxmacos.md). + +### Non Windows section + +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. +{% endif %} + +## Conclusion + +Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I +might add to this if needed. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt new file mode 100644 index 00000000000..f62a4f31fee --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt @@ -0,0 +1,2 @@ +blablabla +blablablabla diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json new file mode 100644 index 00000000000..b7786c066a7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-1", + "source_file": "tests/test_files/ftts/tts1.md", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-1", + "parent_title": "Main-title", + "previous_title": "Main-title", + "next_title": "Subtitle-2-g", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-1" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt new file mode 100644 index 00000000000..bdf68551202 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt @@ -0,0 +1 @@ +blablabla \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json new file mode 100644 index 00000000000..eb5403804e2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-5-g", + "source_file": "tests/test_files/ftts/tts1.md", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-5-g", + "parent_title": "Main-title", + "previous_title": "Subtitle-2-g", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-5-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt new file mode 100644 index 00000000000..48125d91679 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt @@ -0,0 +1,4 @@ +blablabla generic +blablabla generic +blablabla Linux macOS +blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json new file mode 100644 index 00000000000..f7330bec86d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-2-g", + "source_file": "tests/test_files/ftts/tts1.md", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-2-g", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "Subtitle-1", + "next_title": "Subtitle-4-l&m", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-2-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt new file mode 100644 index 00000000000..b221f26074b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt @@ -0,0 +1,3 @@ +blablabla Linux macOS +blablablabla Linux macOS +blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json new file mode 100644 index 00000000000..a76f852c874 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-4-l&m", + "source_file": "tests/test_files/ftts/tts1.md", + "title_depth": 3, + "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", + "parent_title": "Subtitle-2-g", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "previous_title": "Subtitle-2-g", + "next_title": "Subtitle-5-g", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-4-lm" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt new file mode 100644 index 00000000000..48125d91679 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt @@ -0,0 +1,4 @@ +blablabla generic +blablabla generic +blablabla Linux macOS +blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json new file mode 100644 index 00000000000..8b234c92fa6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-2-g", + "source_file": "tests/test_files/ftts/tts1.md", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-2-g", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "Subtitle-1", + "next_title": "Subtitle-4-l&m", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-2-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt new file mode 100644 index 00000000000..b221f26074b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt @@ -0,0 +1,3 @@ +blablabla Linux macOS +blablablabla Linux macOS +blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json new file mode 100644 index 00000000000..732d309da81 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-4-l&m", + "source_file": "tests/test_files/ftts/tts1.md", + "title_depth": 3, + "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", + "parent_title": "Subtitle-2-g", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "previous_title": "Subtitle-2-g", + "next_title": "Subtitle-5-g", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-4-lm" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt new file mode 100644 index 00000000000..f9f20592832 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt @@ -0,0 +1,4 @@ +blablabla generic +blablabla generic +blablabla windows +blablabla windows with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json new file mode 100644 index 00000000000..7a43426a85f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-2-g", + "source_file": "tests/test_files/ftts/tts1.md", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-2-g", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/windows" + }, + "previous_title": "Subtitle-1", + "next_title": "Subtitle-3-w", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-2-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt new file mode 100644 index 00000000000..0b587cef85a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt @@ -0,0 +1,3 @@ +blablabla windows +blablablabla windows +blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json new file mode 100644 index 00000000000..4d7f494320d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-3-w", + "source_file": "tests/test_files/ftts/tts1.md", + "title_depth": 3, + "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w", + "parent_title": "Subtitle-2-g", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "previous_title": "Subtitle-2-g", + "next_title": "Subtitle-5-g", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-3-w" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md new file mode 100644 index 00000000000..2f3ad7f9c08 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md @@ -0,0 +1,31 @@ +# Main title + +## Subtitle 1 + +blablabla +blablablabla + +## Subtitle 2 g + +blablabla generic +blablabla generic +{% if OS == windows %}blablabla windows +blablabla windows with a [link](windows.md) + +### Subtitle 3 w + +blablabla windows +blablablabla windows +{% else %}blablabla Linux macOS +blablablabla Linux macOS with a [link](linuxmacos.md) + +### Subtitle 4 l&m + +blablabla Linux macOS +blablablabla Linux macOS +{% endif %} +blablabla generic with a [link](generic.md) + +## Subtitle 5 g + +blablabla diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md new file mode 100644 index 00000000000..6a74b3c0181 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md @@ -0,0 +1,4 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md new file mode 100644 index 00000000000..2f9cdc38294 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md @@ -0,0 +1,4 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md new file mode 100644 index 00000000000..360a4a59ba3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md new file mode 100644 index 00000000000..798dcf6db24 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md new file mode 100644 index 00000000000..d93125a5971 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md new file mode 100644 index 00000000000..02141961338 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md new file mode 100644 index 00000000000..cc15fae1df1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md new file mode 100644 index 00000000000..cc15fae1df1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md new file mode 100644 index 00000000000..bdb288474e2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md new file mode 100644 index 00000000000..10443eb67a4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md new file mode 100644 index 00000000000..0731ee3588c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md new file mode 100644 index 00000000000..cd37117cb00 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md new file mode 100644 index 00000000000..6a72a338527 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md new file mode 100644 index 00000000000..dfe342ebfb1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md new file mode 100644 index 00000000000..fb8c1f8b539 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md @@ -0,0 +1,55 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} + +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} + +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} + +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md new file mode 100644 index 00000000000..796e94348fa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md @@ -0,0 +1,55 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} + +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} + +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} + +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md new file mode 100644 index 00000000000..1e18a1495d5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md @@ -0,0 +1,15 @@ +# Title + +Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list. + +1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list. + +2. Second entry + +3. Third entry + + ![image](img/an_image_for_the_third_entry.png) + +4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit. + +And now the text continues like normal in a new section. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py new file mode 100644 index 00000000000..91605dec651 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py @@ -0,0 +1,68 @@ +import pytest +import os +import shutil +from chatbot_parser import main + + +@pytest.mark.parametrize("input_directory,actual_output_directory,expected_output_directory, options", [ + ("tests/test_files/ftps", "tests/test_files/ftps/actual", + "tests/test_files/ftps/output", + {"SOURCE_DIRECTORY": "tests/test_files/ftps", + "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 160, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False} + ), + ("tests/test_files/ftts", "tests/test_files/ftts/actual", + "tests/test_files/ftts/output", + {"SOURCE_DIRECTORY": "tests/test_files/ftts", + "DESTINATION_DIRECTORY": "tests/test_files/ftts/actual", + "SPLIT_ON_TITLES": True, + "SPLIT_ON_PARAGRAPHS": False, + "MIN_PARAGRAPH_LENGTH": 160, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": True, + "VERBOSE": False} + ) +]) +def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options): + # run the script + main(options) + + # Compare directories and files + for dirpath, dirnames, filenames in os.walk(expected_output_directory): + relative_path = os.path.relpath(dirpath, expected_output_directory) + actual_dir = os.path.join(actual_output_directory, relative_path) + + # Check if the directory exists + assert os.path.isdir(actual_dir), f"Directory '{actual_dir}' is missing." + + # Check for files + for filename in filenames: + ref_file = os.path.join(dirpath, filename) + gen_file = os.path.join(actual_dir, filename) + + # Check if the file exists + assert os.path.isfile(gen_file), f"File '{gen_file}' is missing." + + # Check file content + with open(ref_file, 'r') as ref_f, open(gen_file, 'r') as gen_f: + ref_content = ref_f.read().strip() + gen_content = gen_f.read().strip() + assert ref_content == gen_content, f"Content of file '{gen_file}' does not match." + + # check that not too many directories have been generated + for dirpath, dirnames, filenames in os.walk(actual_output_directory): + relative_path = os.path.relpath(dirpath, actual_output_directory) + expected_dir = os.path.join(expected_output_directory, relative_path) + + # Check if the directory exists + assert os.path.isdir(expected_dir), f"Directory '{relative_path}' was made, but shouldn't have been." + + # remove directory + shutil.rmtree(actual_output_directory, ignore_errors=True) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py new file mode 100644 index 00000000000..4d0dd876103 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -0,0 +1,32 @@ +import pytest +import os +import shutil +from chatbot_parser import mangle_ifs + + +@pytest.mark.parametrize("input_file,output_file", [ + ("if_mangler_1_input.md", "if_mangler_1_output.md"), + ("if_mangler_2_input.md", "if_mangler_2_output.md"), + ("if_mangler_3_input.md", "if_mangler_3_output.md"), + ("if_mangler_4_input.md", "if_mangler_4_output.md"), + ("if_mangler_5_input.md", "if_mangler_5_output.md"), + ("if_mangler_6_input.md", "if_mangler_6_output.md"), + ("if_mangler_7_input.md", "if_mangler_7_output.md") +]) +def test_if_mangler(input_file, output_file): + # make directory + os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) + + # make filepaths + input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) + expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) + actual_output_file_path = os.path.join("if_mangled_files", input_file) + mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) + + # check every line + with open(expected_output_file_path, "r") as expected_read_file: + with open(actual_output_file_path, "r") as actual_read_file: + assert all([expected_line == actual_line for expected_line, actual_line in zip(expected_read_file, actual_read_file)]) + + # remove directory + shutil.rmtree("if_mangled_files", ignore_errors=True) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py new file mode 100644 index 00000000000..9109f2518ad --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py @@ -0,0 +1,31 @@ +import pytest +from chatbot_parser import insert_links + +options_include = {"INCLUDE_LINKS_IN_PLAINTEXT": True} +options_leave_out = {"INCLUDE_LINKS_IN_PLAINTEXT": False} +links_input = {"0": "https://first_link.com", "1": "https://second_link.be", "2": "https://docs.hpc.ugent.be/account#welcome-e-mail", "3": "https://final-link.org"} + + +@pytest.mark.parametrize("text_input, options_input, text_output, new_links", [ + # Text without links + # don't include links + ("Text without links\nand with two lines.", options_leave_out, "Text without links\nand with two lines.", {}), + # include links + ("Text without links\nand with two lines.", options_include, "Text without links\nand with two lines.", {}), + # Text with all links + # don't include links + ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§0Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§2Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§3Ā§linkĀ§linkĀ§", options_leave_out, + "Text with all the links\nand with multiple lines.\n\n\n\n", links_input), + # include links + ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§0Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§2Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§3Ā§linkĀ§linkĀ§", options_include, + "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://second_link.be \n https://docs.hpc.ugent.be/account#welcome-e-mail \n https://final-link.org ", links_input), + # Text with some links + # don't include links + ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§3Ā§linkĀ§linkĀ§", options_leave_out, + "Text with all the links\nand with multiple lines.\n\n", {"0": "https://second_link.be", "1": "https://final-link.org"}), + # include links + ("Text with all the links\nand with multiple lines.\nĀ§linkĀ§linkĀ§0Ā§linkĀ§linkĀ§\nĀ§linkĀ§linkĀ§2Ā§linkĀ§linkĀ§", options_include, + "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://docs.hpc.ugent.be/account#welcome-e-mail ", {"0": "https://first_link.com", "1": "https://docs.hpc.ugent.be/account#welcome-e-mail"}) +]) +def test_insert_links(text_input, options_input, text_output, new_links): + assert insert_links(text_input, links_input, options_input) == (text_output, new_links) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py new file mode 100644 index 00000000000..d1acca1d740 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_links.py @@ -0,0 +1,69 @@ +import os +import pytest +from urllib import request +from chatbot_parser import main +import json + +whitelist = ["mailto:hpc@ugent.be"] +slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"] + +options_general = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC", + "DESTINATION_DIRECTORY": ".", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 683, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False} +options_os_specific = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC/linux-tutorial", + "DESTINATION_DIRECTORY": "./linux-tutorial", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 683, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False} + + +@pytest.mark.parametrize("options", [options_general, options_os_specific]) +def test_all_links(options): + all_links = {} + main(options) + broken_links = {} + empty_links = {} + + for (dirpath, dirnames, filenames) in os.walk(os.path.join(options['DESTINATION_DIRECTORY'], 'parsed_mds')): + for filename in filenames: + all_links[filename] = [] + if filename.endswith('metadata.json'): + data = json.load(open(os.path.join(dirpath, filename))) + if 'links' in data.keys(): + for key in data['links'].keys(): + all_links[filename].append(data['links'][key]) + all_links[filename].append(data['reference_link'].split("#")[0]) + + for filename in all_links.keys(): + all_links[filename] = list(set(all_links[filename])) + for link in all_links[filename]: + if len(link) != 0: + try: + if link not in whitelist and link not in slow_list: + with request.urlopen(link) as res: + if res.status == 200: + pass + except: + print("Broken link in " + filename + ": " + link) + if filename in broken_links.keys(): + broken_links[filename].append(link) + else: + broken_links[filename] = [link] + else: + print("Empty link in " + filename) + if filename in empty_links.keys(): + empty_links[filename].append(link) + else: + empty_links[filename] = [link] + assert len(empty_links.keys()) == 0 + assert len(broken_links.keys()) == 0 diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py new file mode 100644 index 00000000000..06e56a5cb2c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py @@ -0,0 +1,27 @@ +import pytest +from chatbot_parser import split_on_paragraphs + + +@pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [ + ("./test_files/list_file/list_test.md", + "list_test.md", + { + "SOURCE_DIRECTORY": "./test_files/list_file", + "DESTINATION_DIRECTORY": "./test_files/list_file", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 100, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False + }, + False, + { + 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", + 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.' + } + ) +]) +def test_links(file, main_title, options, is_linux_tutorial, expected_text): + assert split_on_paragraphs(file, main_title, options, is_linux_tutorial)[1] == expected_text diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py new file mode 100644 index 00000000000..225c368477d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py @@ -0,0 +1,14 @@ +import pytest +from chatbot_parser import make_valid_title + + +@pytest.mark.parametrize("input_string,expected", [ + ("", ""), + ("A-good-filename-with-dashes", "A-good-filename-with-dashes"), + (" A very good filename beginning and ending in a space ", "A-very-good-filename-beginning-and-ending-in-a-space"), + ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"), + ("A filename containing bad characters <>:\"/\\|?*\0", "A-filename-containing-bad-characters"), + ("A filename ending with {some jinja garbage}", "A-filename-ending-with") +]) +def test_make_valid_title(input_string, expected): + assert make_valid_title(input_string) == expected diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py new file mode 100644 index 00000000000..f4cee6dd75c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py @@ -0,0 +1,46 @@ +import pytest +from chatbot_parser import replace_markdown_markers + + +@pytest.mark.parametrize("input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist", [ + # baseline test + ("A normal line with nothing special", [], False, "", "A normal line with nothing special", []), + # image 1 + ("![image](a-nice-image.png)", [], False, "", "", []), + # image 2 + ("![](img/Look-at-this-photograph.png)", [], False, "", "", []), + # link 1 (outside docs) + ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "", + "A line with a linkĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "a-nice-link.com"]), + # link 2 (another document within the docs) + ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "", + "A line with a link to the docsĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), + # link 3 (the same document) + ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md", + "A line with a link to the same docĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), + # codeblock + ("```shell", [], True, "", "", []), + # html syntax 1 (normal syntax) + ("A line with something in Bold", [], False, "", "A line with something in Bold", []), + # html syntax 2 (link) + ("A line with another link
", ["other-website.com"], False, "", + "A line with another linkĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["other-website.com", "website.com"]), + # html syntax 3 (style) + ("

A line with style

", [], False, "", "A line with style", []), + # Bot comment + ("", [], False, "", "Something about the following table", []), + # non-Bot comment + ("", [], False, "", "", []), + # something else with <> + ("A line with an example where you should put ", [], False, "", "A line with an example where you should put ", []), + # info/tips/warnings + ("!!! warning", [], False, "", " warning", []), + # collapsable admonitions + ("??? note", [], False, "", " note", []), + # Markdown syntax 1 (not in code block) + ("`Line` **with** ++a++ _lot_ *of* _++markdown++_ `syntax`", [], False, "", "Line with a lot of markdown syntax", []), + # Markdown syntax 2 (in code block) + ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", []) +]) +def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist): + assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py new file mode 100644 index 00000000000..6c30fef7985 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py @@ -0,0 +1,15 @@ +import pytest +import os +from chatbot_parser import write_metadata + + +@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [ + ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), + ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2, + os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source", + {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, + "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), + "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}}) +]) +def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output): + assert write_metadata(main_title, subtitle, links, title_level, directory, source_file) == output From d96aeba8a42caea9ec76b5d4a574dade095fff84 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 15:14:34 +0200 Subject: [PATCH 146/152] change filenames --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 24e0b287a0a..152c3b45988 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -536,9 +536,9 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa # create a title for the previous paragraph if current_paragraph_number == -1: - paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) + paragraph_title = main_title + _PARAGRAPH_ + f"{paragraph_number:03}" else: - paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) + paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + f"{current_paragraph_number:03}.{paragraph_number:03}" paragraph_number += 1 # write text of previous file @@ -588,9 +588,9 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa # create a title for the last paragraph if current_paragraph_number == -1: - paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) + paragraph_title = main_title + _PARAGRAPH_ + f"{paragraph_number:03}" else: - paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) + paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + f"{current_paragraph_number:03}.{paragraph_number:03}" # write dictionaries for the last file if previous_contained_if: From bec9a63b71bb71d566b6d32c5a8a827e88eb098b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 15:33:09 +0200 Subject: [PATCH 147/152] Updated all tests to incorporate the latest changes --- .../chatbot_parser.py | 2 +- ...paragraph_1.txt => tps1_paragraph_001.txt} | 0 ....json => tps1_paragraph_001_metadata.json} | 2 +- ...paragraph_3.txt => tps1_paragraph_003.txt} | 0 ....json => tps1_paragraph_003_metadata.json} | 2 +- ...1.txt => tps1_linux_paragraph_002.001.txt} | 0 ...ps1_linux_paragraph_002.001_metadata.json} | 4 +-- ...2.txt => tps1_linux_paragraph_002.002.txt} | 0 ...ps1_linux_paragraph_002.002_metadata.json} | 4 +-- ...1.txt => tps1_macos_paragraph_002.001.txt} | 0 ...ps1_macos_paragraph_002.001_metadata.json} | 4 +-- ...2.txt => tps1_macos_paragraph_002.002.txt} | 0 ...ps1_macos_paragraph_002.002_metadata.json} | 4 +-- ...txt => tps1_windows_paragraph_002.001.txt} | 0 ...1_windows_paragraph_002.001_metadata.json} | 4 +-- ...txt => tps1_windows_paragraph_002.002.txt} | 0 ...1_windows_paragraph_002.002_metadata.json} | 4 +-- .../tests/test_full_script.py | 2 +- .../tests/test_if_mangler.py | 2 +- .../tests/test_links.py | 2 ++ .../tests/test_lists.py | 27 ++++++++++++++++--- .../tests/test_replace_markdown_markers.py | 16 +++++------ 22 files changed, 50 insertions(+), 29 deletions(-) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/{tps1_paragraph_1.txt => tps1_paragraph_001.txt} (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/{tps1_paragraph_1_metadata.json => tps1_paragraph_001_metadata.json} (90%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/{tps1_paragraph_3.txt => tps1_paragraph_003.txt} (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/{tps1_paragraph_3_metadata.json => tps1_paragraph_003_metadata.json} (86%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/{tps1_linux_paragraph_2.1.txt => tps1_linux_paragraph_002.001.txt} (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/{tps1_linux_paragraph_2.1_metadata.json => tps1_linux_paragraph_002.001_metadata.json} (80%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/{tps1_linux_paragraph_2.2.txt => tps1_linux_paragraph_002.002.txt} (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/{tps1_linux_paragraph_2.2_metadata.json => tps1_linux_paragraph_002.002_metadata.json} (76%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/{tps1_macos_paragraph_2.1.txt => tps1_macos_paragraph_002.001.txt} (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/{tps1_macos_paragraph_2.1_metadata.json => tps1_macos_paragraph_002.001_metadata.json} (80%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/{tps1_macos_paragraph_2.2.txt => tps1_macos_paragraph_002.002.txt} (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/{tps1_macos_paragraph_2.2_metadata.json => tps1_macos_paragraph_002.002_metadata.json} (76%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/{tps1_windows_paragraph_2.1.txt => tps1_windows_paragraph_002.001.txt} (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/{tps1_windows_paragraph_2.1_metadata.json => tps1_windows_paragraph_002.001_metadata.json} (79%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/{tps1_windows_paragraph_2.2.txt => tps1_windows_paragraph_002.002.txt} (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/{tps1_windows_paragraph_2.2_metadata.json => tps1_windows_paragraph_002.002_metadata.json} (77%) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 152c3b45988..ee45d5455d8 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -165,7 +165,7 @@ def make_valid_link(link, main_title, is_linux_tutorial): link = link.replace('../', '') if link.startswith("#"): - link = DOCS_URL + '/' + linux_part + main_title + "/" + link + link = DOCS_URL + '/' + linux_part + main_title.replace(".md", "") + "/" + link elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]): link = DOCS_URL + '/' + linux_part + link.replace(".md", "") elif '.md#' in link: diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001.txt diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001_metadata.json similarity index 90% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001_metadata.json index 08c0b4e4973..31cbf626d8d 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_001_metadata.json @@ -9,7 +9,7 @@ }, "parent_title": "", "previous_title": null, - "next_title": "tps1_paragraph_2", + "next_title": "tps1_paragraph_002", "OS": "generic", "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003.txt diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003_metadata.json similarity index 86% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003_metadata.json index 2f1ea4dcd1f..cc7b47a8b5a 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_003_metadata.json @@ -5,7 +5,7 @@ "title_depth": 2, "directory": "tps1", "parent_title": "", - "previous_title": "tps1_paragraph_2", + "previous_title": "tps1_paragraph_002", "next_title": null, "OS": "generic", "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion" diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001.txt diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001_metadata.json similarity index 80% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001_metadata.json index 208cb3472f4..fb165c8e7fc 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.001_metadata.json @@ -8,8 +8,8 @@ "links": { "0": "https://docs.hpc.ugent.be/linuxmacos" }, - "previous_title": "tps1_paragraph_1", - "next_title": "tps1_linux_paragraph_2.2", + "previous_title": "tps1_paragraph_001", + "next_title": "tps1_linux_paragraph_002.002", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002.txt diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002_metadata.json similarity index 76% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002_metadata.json index b975dfe4e03..36cda85cfcc 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_002.002_metadata.json @@ -5,8 +5,8 @@ "title_depth": 3, "directory": "tps1", "parent_title": "OS-specific-sections", - "previous_title": "tps1_linux_paragraph_2.1", - "next_title": "tps1_paragraph_3", + "previous_title": "tps1_linux_paragraph_002.001", + "next_title": "tps1_paragraph_003", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001.txt diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001_metadata.json similarity index 80% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001_metadata.json index 9c605eb9004..2de51c7c0e1 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.001_metadata.json @@ -8,8 +8,8 @@ "links": { "0": "https://docs.hpc.ugent.be/linuxmacos" }, - "previous_title": "tps1_paragraph_1", - "next_title": "tps1_macos_paragraph_2.2", + "previous_title": "tps1_paragraph_001", + "next_title": "tps1_macos_paragraph_002.002", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002.txt diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002_metadata.json similarity index 76% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002_metadata.json index e3ca81d7cc5..fb48000e679 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_002.002_metadata.json @@ -5,8 +5,8 @@ "title_depth": 3, "directory": "tps1", "parent_title": "OS-specific-sections", - "previous_title": "tps1_macos_paragraph_2.1", - "next_title": "tps1_paragraph_3", + "previous_title": "tps1_macos_paragraph_002.001", + "next_title": "tps1_paragraph_003", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001.txt diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001_metadata.json similarity index 79% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001_metadata.json index ab58c622b8c..00b7fcee452 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.001_metadata.json @@ -8,8 +8,8 @@ "links": { "0": "https://docs.hpc.ugent.be/windows" }, - "previous_title": "tps1_paragraph_1", - "next_title": "tps1_windows_paragraph_2.2", + "previous_title": "tps1_paragraph_001", + "next_title": "tps1_windows_paragraph_002.002", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002.txt diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002_metadata.json similarity index 77% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002_metadata.json index 435c9e9c484..0e38a476d04 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_002.002_metadata.json @@ -5,8 +5,8 @@ "title_depth": 3, "directory": "tps1", "parent_title": "OS-specific-sections", - "previous_title": "tps1_windows_paragraph_2.1", - "next_title": "tps1_paragraph_3", + "previous_title": "tps1_windows_paragraph_002.001", + "next_title": "tps1_paragraph_003", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py index 91605dec651..99baf41ebc0 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py @@ -11,7 +11,7 @@ "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual", "SPLIT_ON_TITLES": False, "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 160, + "MIN_PARAGRAPH_LENGTH": 50, "MAX_TITLE_DEPTH": 4, "INCLUDE_LINKS_IN_PLAINTEXT": False, "DEEP_DIRECTORIES": False, diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py index 4d0dd876103..c2ae9fea19e 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -21,7 +21,7 @@ def test_if_mangler(input_file, output_file): input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) actual_output_file_path = os.path.join("if_mangled_files", input_file) - mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) + mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.', "VERBOSE": False}) # check every line with open(expected_output_file_path, "r") as expected_read_file: diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py index d1acca1d740..a13675dd3ad 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_links.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_links.py @@ -4,6 +4,8 @@ from chatbot_parser import main import json +#################################################### IMPORTANT: This test still fails because there are some invalid links in the documentation #################################################### + whitelist = ["mailto:hpc@ugent.be"] slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"] diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py index 06e56a5cb2c..d8a3d630c4c 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py @@ -17,10 +17,29 @@ "VERBOSE": False }, False, - { - 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", - 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.' - } + {'list_test.md_paragraph_001': 'Title\n' + 'Some explanation about the following list that ' + 'is quite long. This could be problematic since ' + 'this could mean that the explanation of the ' + 'content of the list would be part of a ' + 'different paragraph than the list.\n' + '1. First entry that is very verbose since we ' + 'want to hit the character limit for a ' + "paragraph to make sure a list can't be split " + 'in the middle. If this entry is long enough, ' + 'the character limit should make it so that any ' + 'of the following newlines can be the start of ' + "a new section if the splitter doesn't know it " + 'is in a list.\n' + '2. Second entry\n' + '3. Third entry\n' + '4. Fourth entry that is very verbose, so we ' + 'hit the character limit for a section split, ' + "even though it shouldn't be necessary since " + 'the explanation of the list is already well ' + 'above the character limit.\n', + 'list_test.md_paragraph_002': 'And now the text continues like normal in a ' + 'new section.'} ) ]) def test_links(file, main_title, options, is_linux_tutorial, expected_text): diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py index f4cee6dd75c..d9150290f34 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py @@ -10,14 +10,14 @@ # image 2 ("![](img/Look-at-this-photograph.png)", [], False, "", "", []), # link 1 (outside docs) - ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "", - "A line with a linkĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "a-nice-link.com"]), + ("A line with a [link](https://a-nice-link.com)", ["https://another-link.be"], False, "", + "A line with a linkĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["https://another-link.be", "https://a-nice-link.com"]), # link 2 (another document within the docs) - ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "", - "A line with a link to the docsĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), + ("A line with a [link to the docs](account.md#welcome-e-mail)", ["https://another-link.be"], False, "", + "A line with a link to the docsĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["https://another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), # link 3 (the same document) - ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md", - "A line with a link to the same docĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), + ("A line with a [link to the same doc](#welcome-e-mail)", ["https://another-link.be"], False, "account.md", + "A line with a link to the same docĀ§linkĀ§linkĀ§1Ā§linkĀ§linkĀ§", ["https://another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), # codeblock ("```shell", [], True, "", "", []), # html syntax 1 (normal syntax) @@ -28,7 +28,7 @@ # html syntax 3 (style) ("

A line with style

", [], False, "", "A line with style", []), # Bot comment - ("", [], False, "", "Something about the following table", []), + ("", [], False, "", "Something about the following table", []), # non-Bot comment ("", [], False, "", "", []), # something else with <> @@ -43,4 +43,4 @@ ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", []) ]) def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist): - assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist) + assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, False) == (expected_line, expected_linklist) From 886da19db4cd8b6469fe04fb604f1e708035d265 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 15:50:25 +0200 Subject: [PATCH 148/152] change working directory of test lists --- scripts/HPC_chatbot_preprocessor/tests/test_lists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py index d8a3d630c4c..56ac3348dfa 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py @@ -3,7 +3,7 @@ @pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [ - ("./test_files/list_file/list_test.md", + ("./tests/test_files/list_file/list_test.md", "list_test.md", { "SOURCE_DIRECTORY": "./test_files/list_file", From b354bb2c363de592a4cdbcfeb82dfa9f419de32d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 16:19:45 +0200 Subject: [PATCH 149/152] change directory in metadata to linux version --- .../generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json | 2 +- .../tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json | 2 +- .../tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json | 2 +- .../Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json | 2 +- .../tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json | 2 +- .../Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json | 2 +- .../tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json | 2 +- .../Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json index b7786c066a7..e481468cefe 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json @@ -3,7 +3,7 @@ "subtitle": "Subtitle-1", "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-1", + "directory": "tts1/Main-title/Subtitle-1", "parent_title": "Main-title", "previous_title": "Main-title", "next_title": "Subtitle-2-g", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json index eb5403804e2..100766dd865 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json @@ -3,7 +3,7 @@ "subtitle": "Subtitle-5-g", "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-5-g", + "directory": "tts1/Main-title/Subtitle-5-g", "parent_title": "Main-title", "previous_title": "Subtitle-2-g", "next_title": null, diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json index f7330bec86d..6f42345d013 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -3,7 +3,7 @@ "subtitle": "Subtitle-2-g", "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-2-g", + "directory": "tts1/Main-title/Subtitle-2-g", "parent_title": "Main-title", "links": { "0": "https://docs.hpc.ugent.be/linuxmacos" diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json index a76f852c874..351b6f5cca6 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -3,7 +3,7 @@ "subtitle": "Subtitle-4-l&m", "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 3, - "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", + "directory": "tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m", "parent_title": "Subtitle-2-g", "links": { "0": "https://docs.hpc.ugent.be/generic" diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json index 8b234c92fa6..30249d3d155 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -3,7 +3,7 @@ "subtitle": "Subtitle-2-g", "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-2-g", + "directory": "tts1/Main-title/Subtitle-2-g", "parent_title": "Main-title", "links": { "0": "https://docs.hpc.ugent.be/linuxmacos" diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json index 732d309da81..087fe810609 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -3,7 +3,7 @@ "subtitle": "Subtitle-4-l&m", "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 3, - "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", + "directory": "tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m", "parent_title": "Subtitle-2-g", "links": { "0": "https://docs.hpc.ugent.be/generic" diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json index 7a43426a85f..da3c61d3edc 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -3,7 +3,7 @@ "subtitle": "Subtitle-2-g", "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-2-g", + "directory": "tts1/Main-title/Subtitle-2-g", "parent_title": "Main-title", "links": { "0": "https://docs.hpc.ugent.be/windows" diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json index 4d7f494320d..e07586cf55e 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json @@ -3,7 +3,7 @@ "subtitle": "Subtitle-3-w", "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 3, - "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w", + "directory": "tts1/Main-title/Subtitle-2-g/Subtitle-3-w", "parent_title": "Subtitle-2-g", "links": { "0": "https://docs.hpc.ugent.be/generic" From 87e557590c8c5c5e10af46035da41d7f422e5c29 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 16:19:55 +0200 Subject: [PATCH 150/152] change directory in metadata to linux version --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index ee45d5455d8..9b6e8dc7f2d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -634,7 +634,7 @@ def write_metadata(main_title, subtitle, links, title_level, directory, source_f :return paragraph_metadata: dictionary containing the metadata about the section """ - paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory} + paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: Path(directory).as_posix()} if len(links) > 0: paragraph_metadata[LINKS] = {} @@ -1061,6 +1061,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) else: os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + os_specific_metadata[os_subtitle][DIRECTORY] = Path(os_specific_metadata[os_subtitle][DIRECTORY]).as_posix() # make a directory to save the files filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) From 8f39e08aab216886bc89d495f631510448e4ce31 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 16:21:48 +0200 Subject: [PATCH 151/152] add pytest to requirements --- scripts/HPC_chatbot_preprocessor/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 37137582aad..1b9cb4a5052 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -1,4 +1,5 @@ PyYAML==6.0.2 Jinja2==3.1.4 tiktoken~=0.7.0 -pathlib~=1.0.1 \ No newline at end of file +pathlib~=1.0.1 +pytest \ No newline at end of file From d6c33e75c59f46745795418da20bc242328ec77d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 16:33:04 +0200 Subject: [PATCH 152/152] change test for write_metadata --- .../HPC_chatbot_preprocessor/tests/test_write_metadata.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py index 6c30fef7985..cb80d00975c 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py @@ -1,14 +1,15 @@ import pytest import os from chatbot_parser import write_metadata +from pathlib import Path @pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [ - ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), + ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": ".", "parent_title": ""}), ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2, - os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source", + Path(os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle")).as_posix(), "source", {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, - "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), + "directory": Path(os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle")).as_posix(), "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}}) ]) def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output):