Skip to content

Commit

Permalink
Merge pull request #941 from spencermountain/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
spencermountain authored Jul 29, 2022
2 parents 6a58ff8 + 88fb943 commit 98282ff
Show file tree
Hide file tree
Showing 14 changed files with 47 additions and 17 deletions.
2 changes: 1 addition & 1 deletion builds/compromise.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/one/compromise-one.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/one/compromise-one.mjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/three/compromise-three.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/three/compromise-three.mjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/two/compromise-two.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/two/compromise-two.mjs

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ While all _Major_ releases should be reviewed, our only _large_ releases are **v

<!-- #### 14.5.0 [Unreleased]
-->
#### 14.4.2 [July 2022]
- **[fix]** - hotfix for sentence tokenization issue #935

#### 14.4.1 [July 2022]
- **[change]** - improvements to negative-optional match logic - `!foo?`
- **[change]** - support short sentences embedded in quotes+parentheses
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"author": "Spencer Kelly <[email protected]> (http://spencermounta.in)",
"name": "compromise",
"description": "modest natural language processing",
"version": "14.4.1",
"version": "14.4.2",
"main": "./src/three.js",
"unpkg": "./builds/compromise.js",
"type": "module",
Expand Down Expand Up @@ -114,4 +114,4 @@
"_tests/**"
],
"license": "MIT"
}
}
10 changes: 5 additions & 5 deletions scratch.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ let txt = ''
let doc
let m

// doc = nlp('Those are Great Danes')
// doc.nouns(0).toSingular()
// console.log(doc.text())

let arr = []

doc = nlp("she is cool").sentences()
doc.toFutureTense()
console.log(doc.text())
doc = nlp(`The hero was stunned by the scary monster. The glowing girl said (Hey! Leave him alone!).`)
doc.debug()
2 changes: 2 additions & 0 deletions src/1-one/tokenize/methods/01-sentences/04-quote-merge.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ const quoteMerge = function (splits) {
// look at the next sentence for a closing quote,
if (closesQuote(splits[i + 1]) && splits[i + 1].length < MAX_QUOTE) {
splits[i] += splits[i + 1]// merge them
arr.push(splits[i])
splits[i + 1] = ''
i += 1
continue
Expand All @@ -60,6 +61,7 @@ const quoteMerge = function (splits) {
//make sure it's not too-long
if (toAdd.length < MAX_QUOTE) {
splits[i] += toAdd
arr.push(splits[i])
splits[i + 1] = ''
splits[i + 2] = ''
i += 2
Expand Down
1 change: 1 addition & 0 deletions src/1-one/tokenize/methods/01-sentences/05-parens-merge.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ const mergeParens = function (splits) {
if (m2 !== null && m.length === 1 && !hasOpen.test(splits[i + 1])) {
// merge in 2nd sentence
splits[i] += splits[i + 1]
arr.push(splits[i])
splits[i + 1] = ''
i += 1
continue
Expand Down
2 changes: 1 addition & 1 deletion src/_version.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export default '14.4.1'
export default '14.4.2'
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import test from 'tape'
import nlp from '../_lib.js'
const here = '[two/tokenize] '
import nlp from '../../two/_lib.js'
const here = '[one/sentence-split] '


test('sentence tokenizer', function (t) {
Expand All @@ -24,6 +24,9 @@ test('sentence tokenizer', function (t) {
// [`it fell out of the bag. (I wasn't fast enough.) Now it's on the floor.`, 3],
[`the scent of basil (my favorite).`, 1],
[`Your whole life (right? right?) might go smoothly this year.`, 1],
[`before. (inside word) and (inside). after`, 3],
[`before. (inside word?) and (inside!). after`, 3],
[`before. (the whole thing is inside). after`, 3],
// quotation wrapper
[`the doc said "no sir" and walked away. the end`, 2],
[`Kendal asked, “What time is it?”`, 1],
Expand All @@ -37,6 +40,9 @@ test('sentence tokenizer', function (t) {
// mis-matched examples
['i thought "no way! and he said "yes way".', 2],//
['i thought (no way! and he said (yes)', 2],//
['i thought (no way! and he said yes', 2],
['(no way! and he said yes', 2],
['"no way! and he\'s cool', 2],
]
arr.forEach(a => {
let [str, len] = a
Expand Down Expand Up @@ -83,3 +89,21 @@ test('emoji-only sentence', function (t) {
t.equal(doc.length, 2, here + 'boemojith sentence')
t.end()
})

test('nested quotes', function (t) {
let doc = nlp(`The hero was stunned by the scary monster. The glowing girl said "Hey! Leave him alone!".`)
t.equal(doc.length, 2, here + 'nested quote sentence')

doc = nlp(`foo bar. Before "quote here" and "quote here".`)
t.equal(doc.length, 2, here + '2 quote sentence')

doc = nlp(`foo bar. Before "quote here?" and "quote here?".`)
t.equal(doc.length, 2, here + '2 quotes with sentence')

doc = nlp(`Foo bar. Before "quote here? and quote here?". After`)
t.equal(doc.length, 3, here + '1 quotes with 2 sentences')

doc = nlp(`Foo bar. Before "quote here? and quote here? also here!". After`)
t.equal(doc.length, 3, here + '1 quotes with 3 sentences')
t.end()
})

0 comments on commit 98282ff

Please sign in to comment.