Skip to content

Commit

Permalink
made some more PatternModelOptions available to the Python binding
Browse files Browse the repository at this point in the history
  • Loading branch information
proycon committed Apr 9, 2020
1 parent c260d65 commit 317537b
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 3 deletions.
2 changes: 1 addition & 1 deletion codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"@type": "SoftwareSourceCode",
"identifier": "colibricore",
"name": "Colibri Core",
"version": "2.5.2",
"version": "2.5.3",
"description": "Colibri core is an NLP tool as well as a C++ and Python library for working with basic linguistic constructions such as n-grams and skipgrams (i.e patterns with one or more gaps, either of fixed or dynamic size) in a quick and memory-efficient way. ",
"license": "https://spdx.org/licenses/GPL-3.0",
"url": "https://proycon.github.io/colibri-core",
Expand Down
3 changes: 3 additions & 0 deletions colibricore_classes.in.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -315,11 +315,13 @@ cdef extern from "patternmodel.h":
cdef cppclass PatternModelOptions:
int MINTOKENS
int MINTOKENS_UNIGRAMS
int MINTOKENS_SKIPGRAMS
int MINLENGTH
int MAXLENGTH
bool DOSKIPGRAMS
bool DOSKIPGRAMS_EXHAUSTIVE
int MINSKIPTYPES
int MAXSKIPS
bool DOREVERSEINDEX
bool DEBUG
bool QUIET
Expand All @@ -328,6 +330,7 @@ cdef extern from "patternmodel.h":
bool DOREMOVESKIPGRAMS
bool DOREMOVEFLEXGRAMS
bool DORESET
int PRUNENONSUBSUMED

cdef cppclass IndexedDataHandler:
unsigned int count(IndexedData &)
Expand Down
15 changes: 15 additions & 0 deletions colibricore_wrapper.in.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -890,13 +890,16 @@ cdef class PatternModelOptions:
* DOSKIPGRAMS - Compute skipgrams?
* DOSKIPGRAMS_EXHAUSTIVE - Compute skipgrams exhaustively?
* MINSKIPTYPES - Minimum amount of different skip content types
* MAXSKIPS - The maximum amount of skips in a skipgram
* DOREVERSEINDEX - Build reverse index? (default: True)
* DOPATTERNPERLINE - Assume each line holds one single pattern.
* MINTOKENS_UNIGRAMS - Word occurrence threshold (secondary threshold): only count patterns in which the words/unigrams occur at least this many times, only effective when the primary
* MINTOKENS_SKIPGRAMS - The occurrence threshold for skipgrams, minimum amount of occurrences for a pattern to be included in a model. Defaults to the same value as MINTOKENS. Only used if DOSKIPGRAMS or DO_SKIPGRAMS_EXHAUSTIVE is set to true
* DOREMOVENGRAMS - Remove n-grams from the model
* DOREMOVESKIPGRAMS - Remove skipgrams from the model
* DOREMOVEFLEXGRAMS - Remove flexgrams from the model
* DORESET - Reset all counts before training
* PRUNENONSUBSUMED - Prune all n-grams up to this length that are not subsumed by higher-order ngrams
* DEBUG
* QUIET (default: False)
Expand All @@ -923,8 +926,12 @@ cdef class PatternModelOptions:
self.coptions.DOSKIPGRAMS_EXHAUSTIVE = value
elif key == 'MINTOKENS_UNIGRAMS':
self.coptions.MINTOKENS_UNIGRAMS = value
elif key == 'MINTOKENS_SKIPGRAMS':
self.coptions.MINTOKENS_SKIPGRAMS = value
elif key == 'MINSKIPTYPES':
self.coptions.MINSKIPTYPES = value
elif key == 'MAXSKIPS':
self.coptions.MAXSKIPS = value
elif key == 'DOREVERSEINDEX':
self.coptions.DOREVERSEINDEX = value
elif key == 'DOPATTERNPERLINE':
Expand All @@ -941,6 +948,8 @@ cdef class PatternModelOptions:
self.coptions.DEBUG = value
elif key == 'QUIET':
self.coptions.QUIET = value
elif key == 'PRUNENONSUBSUMED':
self.coptions.PRUNENONSUBSUMED = value
else:
raise KeyError

Expand All @@ -957,8 +966,12 @@ cdef class PatternModelOptions:
return self.coptions.DOSKIPGRAMS_EXHAUSTIVE
elif key == 'MINTOKENS_UNIGRAMS':
return self.coptions.MINTOKENS_UNIGRAMS
elif key == 'MINTOKENS_SKIPGRAMS':
return self.coptions.MINTOKENS_SKIPGRAMS
elif key == 'MINSKIPTYPES':
return self.coptions.MINSKIPTYPES
elif key == 'MAXSKIPS':
return self.coptions.MAXSKIPS
elif key == 'DOREVERSEINDEX':
return self.coptions.DOREVERSEINDEX
elif key == 'DOPATTERNPERLINE':
Expand All @@ -975,6 +988,8 @@ cdef class PatternModelOptions:
return self.coptions.DEBUG
elif key == 'QUIET':
return self.coptions.QUIET
elif key == 'PRUNENONSUBSUMED':
return self.coptions.PRUNENONSUBSUMED
else:
raise KeyError

Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# $URL: https://ilk.uvt.nl/svn/sources/ucto/trunk/configure.ac $

AC_PREREQ([2.67])
AC_INIT([colibri-core],[2.5.2],[[email protected]])
AC_INIT([colibri-core],[2.5.3],[[email protected]])
AC_CONFIG_SRCDIR([configure.ac])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_HEADER([config.h])
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def read(fname):
license = "GPL",
keywords = "nlp computational_linguistics frequency ngram skipgram pmi cooccurrence linguistics",
long_description=read('README.rst'),
version = '2.5.2',
version = '2.5.3',
ext_modules = extensions,
cmdclass = {'build_ext': build_ext},
classifiers=[
Expand Down

0 comments on commit 317537b

Please sign in to comment.