Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ readme + arq #14

Merged
merged 4 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,9 @@ Some skilled engineers even have a blog site where they push some gold content,

![newsletter](./assets/newsletter.png)

## Architecture


# Roadmap

This program aims to create the following features:

- Given a list of websites, that are located in a MongoDB collection, scrape the content of each website and save it in another MongoDB collection. ✅
- After the scraping, calculate the similarity between the new content and the previous content of each website, and update the MongoDB collection
with this information. ✅
- All the registered users will receive an email according to the URL that they have registered notifying them about news in their favorite engineers websites. ✅

Obs: All these flows will be trigerred by a cron job. ✅

![architecture](./assets/newsletterarq.png)

## Environement Variables

Expand Down Expand Up @@ -49,4 +39,14 @@ Access the dev container and run the tests:
make integration-test
```

# Roadmap

This program aims to create the following features:

- Given a list of websites, that are located in a MongoDB collection, scrape the content of each website and save it in another MongoDB collection. ✅
- After the scraping, calculate the similarity between the new content and the previous content of each website, and update the MongoDB collection
with this information. ✅
- All the registered users will receive an email according to the URL that they have registered notifying them about news in their favorite engineers websites. ✅
- Create API routes to register new users and the websites that they want to follow. ⌛

Obs: All these flows will be trigerred by a cron job. ✅
Binary file added assets/newsletterarq.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module github.com/perebaj/newsletter

go 1.21.5

require go.mongodb.org/mongo-driver v1.13.1

require (
github.com/golang/snappy v0.0.1 // indirect
github.com/klauspost/compress v1.13.6 // indirect
Expand All @@ -10,7 +12,6 @@ require (
github.com/xdg-go/scram v1.1.2 // indirect
github.com/xdg-go/stringprep v1.0.4 // indirect
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect
go.mongodb.org/mongo-driver v1.13.1 // indirect
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 // indirect
golang.org/x/text v0.7.0 // indirect
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
Expand Down Expand Up @@ -48,4 +50,5 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
20 changes: 20 additions & 0 deletions mail_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package newsletter

import (
"context"
"testing"
)

type MailClientMockImpl struct{}

func (m MailClientMockImpl) Send(_ []string, _ string) error { return nil }

func TestEmailTrigger(t *testing.T) {
ctx := context.Background()
s := NewStorageMock()
e := MailClientMockImpl{}
err := EmailTrigger(ctx, s, e)
if err != nil {
t.Errorf("expected nil, got %v", err)
}
}
File renamed without changes.
61 changes: 23 additions & 38 deletions scrape_test.go → scraper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,39 @@ import (
"github.com/perebaj/newsletter/mongodb"
)

const fakeURL = "http://fakeurl.test"
type StorageMockImpl struct{}

const FakeURL = "http://fakeurl.test"

func NewStorageMock() StorageMockImpl { return StorageMockImpl{} }
func (s StorageMockImpl) SavePage(_ context.Context, _ []mongodb.Page) error { return nil }
func (s StorageMockImpl) DistinctEngineerURLs(_ context.Context) ([]interface{}, error) {
return []interface{}{FakeURL}, nil
}
func (s StorageMockImpl) Page(_ context.Context, _ string) ([]mongodb.Page, error) {
return []mongodb.Page{}, nil
}
func (s StorageMockImpl) Newsletter() ([]mongodb.Newsletter, error) {
return []mongodb.Newsletter{{URLs: []string{FakeURL}}}, nil
}
func (s StorageMockImpl) PageIn(_ context.Context, _ []string) ([]mongodb.Page, error) {
return []mongodb.Page{
{IsMostRecent: true, URL: FakeURL, Content: "Hello, World!", HashMD5: md5.Sum([]byte("Hello, World!"))},
{IsMostRecent: true, URL: FakeURL, Content: "Hello, World! 2", HashMD5: md5.Sum([]byte("Hello, World! 2"))},
}, nil
}

func TestPageComparation(t *testing.T) {
recentScrapedPage := Page{
Content: "Hello, World!",
URL: fakeURL,
URL: FakeURL,
ScrapeDateTime: time.Now().UTC(),
}

lastScrapedPage := []mongodb.Page{
{
Content: "Hello, World!",
URL: fakeURL,
URL: FakeURL,
ScrapeDatetime: time.Now().UTC().Add(-time.Duration(1) * time.Hour),
HashMD5: md5.Sum([]byte("Hello, World!")),
},
Expand Down Expand Up @@ -115,38 +135,3 @@ func TestFetch_Status500(t *testing.T) {
t.Errorf("expected empty body, got %s", got)
}
}

func TestEmailTrigger(t *testing.T) {
ctx := context.Background()
s := NewStorageMock()
e := MailClientMockImpl{}

err := EmailTrigger(ctx, s, e)
if err != nil {
t.Errorf("expected nil, got %v", err)
}
}

type MailClientMockImpl struct{}

func (m MailClientMockImpl) Send(_ []string, _ string) error { return nil }

type StorageMockImpl struct{}

func NewStorageMock() StorageMockImpl { return StorageMockImpl{} }
func (s StorageMockImpl) SavePage(_ context.Context, _ []mongodb.Page) error { return nil }
func (s StorageMockImpl) DistinctEngineerURLs(_ context.Context) ([]interface{}, error) {
return []interface{}{fakeURL}, nil
}
func (s StorageMockImpl) Page(_ context.Context, _ string) ([]mongodb.Page, error) {
return []mongodb.Page{}, nil
}
func (s StorageMockImpl) Newsletter() ([]mongodb.Newsletter, error) {
return []mongodb.Newsletter{{URLs: []string{fakeURL}}}, nil
}
func (s StorageMockImpl) PageIn(_ context.Context, _ []string) ([]mongodb.Page, error) {
return []mongodb.Page{
{IsMostRecent: true, URL: fakeURL, Content: "Hello, World!", HashMD5: md5.Sum([]byte("Hello, World!"))},
{IsMostRecent: true, URL: fakeURL, Content: "Hello, World! 2", HashMD5: md5.Sum([]byte("Hello, World! 2"))},
}, nil
}