From 18b298668b54eaba0bce75e4be8501751e7efe4a Mon Sep 17 00:00:00 2001 From: karanj Date: Thu, 16 Feb 2023 21:45:06 +1100 Subject: [PATCH] [dictionaries] adding common Hindi tokens Reviewed the Hindi ngrams list, identified a number of common items which could be added to the model --- resources/dictionaries/hi/building_types.txt | 5 ++++ resources/dictionaries/hi/directionals.txt | 4 ++++ resources/dictionaries/hi/people.txt | 1 + resources/dictionaries/hi/personal_titles.txt | 24 +++++++++++++++++++ resources/dictionaries/hi/qualifiers.txt | 3 +++ resources/dictionaries/hi/street_types.txt | 3 ++- 6 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 resources/dictionaries/hi/building_types.txt create mode 100644 resources/dictionaries/hi/directionals.txt create mode 100644 resources/dictionaries/hi/people.txt create mode 100644 resources/dictionaries/hi/personal_titles.txt create mode 100644 resources/dictionaries/hi/qualifiers.txt diff --git a/resources/dictionaries/hi/building_types.txt b/resources/dictionaries/hi/building_types.txt new file mode 100644 index 000000000..180e36062 --- /dev/null +++ b/resources/dictionaries/hi/building_types.txt @@ -0,0 +1,5 @@ +mandir|मन्दिर|मंदिर +station +police station +post office +office \ No newline at end of file diff --git a/resources/dictionaries/hi/directionals.txt b/resources/dictionaries/hi/directionals.txt new file mode 100644 index 000000000..4dfff2006 --- /dev/null +++ b/resources/dictionaries/hi/directionals.txt @@ -0,0 +1,4 @@ +dakshin|दक्षिण +uttar|उत्तर +poorva|poorav|पूर्व +paschim|पश्चिम \ No newline at end of file diff --git a/resources/dictionaries/hi/people.txt b/resources/dictionaries/hi/people.txt new file mode 100644 index 000000000..b79d681f5 --- /dev/null +++ b/resources/dictionaries/hi/people.txt @@ -0,0 +1 @@ +mahatma gandhi|mg|m g \ No newline at end of file diff --git a/resources/dictionaries/hi/personal_titles.txt b/resources/dictionaries/hi/personal_titles.txt new file mode 100644 index 000000000..6f60d0cda --- /dev/null +++ b/resources/dictionaries/hi/personal_titles.txt @@ -0,0 +1,24 @@ +baba +babu +bhagat +guru +jagirdar +maharaja|maharaj +mahatma|महात्मा +pandit +raja +rajarshi +rajkumar +rajkumari +rani +rishi +sahib +sant +sardar +senapati +shah +shrimati|smt|srimathi|श्रीमती +shri|shree|sri|श्री +sushri +swami +ustad \ No newline at end of file diff --git a/resources/dictionaries/hi/qualifiers.txt b/resources/dictionaries/hi/qualifiers.txt new file mode 100644 index 000000000..ba5b38f7f --- /dev/null +++ b/resources/dictionaries/hi/qualifiers.txt @@ -0,0 +1,3 @@ +nagar|नगर +colony +cantonment|cantt \ No newline at end of file diff --git a/resources/dictionaries/hi/street_types.txt b/resources/dictionaries/hi/street_types.txt index fde3a96b2..b5cb55286 100644 --- a/resources/dictionaries/hi/street_types.txt +++ b/resources/dictionaries/hi/street_types.txt @@ -1,3 +1,4 @@ bazaar|bazar marg -nagar \ No newline at end of file +flyover +रोड \ No newline at end of file