Skip to content

Commit

Permalink
Remove non unit test style asserts
Browse files Browse the repository at this point in the history
  • Loading branch information
bshastry committed Oct 10, 2023
1 parent 1805e72 commit fd3e451
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/bandit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- name: Run Bandit
id: bandit
run: bandit -r . --format json -o bandit_results.json -s B101
run: bandit -r . --format json -o bandit_results.json

- name: Save Bandit Results as Artifact
uses: actions/upload-artifact@v2
Expand Down
39 changes: 21 additions & 18 deletions tests/test_text_utils.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
from text_utils.text_utils import tiktoken_len, embedding_cost, return_url_extension
import unittest
import math

# As of 2021-10-20, the cost of embedding a single token using OpenAI is $0.0000001
EMBEDDING_COST_PER_TOKEN = 0.0000001


class TestTextUtils(unittest.TestCase):
def test_tiktoken_len(self):
assert tiktoken_len("Hello, world!") == 4
assert tiktoken_len("This is a sentence.") == 5
assert tiktoken_len("This is a longer sentence with more words.") == 9
self.assertEqual(tiktoken_len("Hello, world!"), 4)
self.assertEqual(tiktoken_len("This is a sentence."), 5)
self.assertEqual(tiktoken_len("This is a longer sentence with more words."), 9)

def test_embedding_cost(self):
class Page:
Expand All @@ -25,8 +24,8 @@ def __init__(self, page_content):
num_tokens = 0
for page in document:
num_tokens += tiktoken_len(page.page_content)
assert num_tokens == 18
assert math.isclose(
self.assertEqual(num_tokens, 18)
self.assertAlmostEquals(
embedding_cost(document), num_tokens * EMBEDDING_COST_PER_TOKEN
)

Expand All @@ -38,25 +37,29 @@ def __init__(self, page_content):
]
for page in document:
num_tokens += tiktoken_len(page.page_content)
assert num_tokens == 29
assert math.isclose(
self.assertEqual(num_tokens, 29)
self.assertAlmostEqual(
embedding_cost(document), (num_tokens * EMBEDDING_COST_PER_TOKEN)
)

def test_return_url_extension(self):
assert return_url_extension("https://www.example.com/index.html") == ".html"
assert (
return_url_extension("https://www.example.com/path/to/file.txt") == ".txt"
self.assertEqual(
return_url_extension("https://www.example.com/index.html"), ".html"
)
assert (
return_url_extension("https://www.example.com/path/to/image.jpg") == ".jpg"
self.assertEqual(
return_url_extension("https://www.example.com/path/to/file.txt"), ".txt"
)
assert return_url_extension("https://www.example.com/path/to/doc.pdf") == ".pdf"
assert (
return_url_extension("https://www.example.com/path/to/README.md") == ".md"
self.assertEqual(
return_url_extension("https://www.example.com/path/to/image.jpg"), ".jpg"
)
assert (
return_url_extension("https://www.example.com/path/to/ms.docx") == ".docx"
self.assertEqual(
return_url_extension("https://www.example.com/path/to/doc.pdf"), ".pdf"
)
self.assertEqual(
return_url_extension("https://www.example.com/path/to/README.md"), ".md"
)
self.assertEqual(
return_url_extension("https://www.example.com/path/to/ms.docx"), ".docx"
)


Expand Down

0 comments on commit fd3e451

Please sign in to comment.