Skip to content

Commit

Permalink
smr: support to summarize Twitter (#172)
Browse files Browse the repository at this point in the history
  • Loading branch information
nekomeowww authored Aug 16, 2023
1 parent edfd70d commit adedee6
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 12 deletions.
36 changes: 27 additions & 9 deletions internal/models/smr/smr.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/nekomeowww/insights-bot/internal/datastore"
"github.com/nekomeowww/insights-bot/internal/thirdparty/openai"
"github.com/nekomeowww/insights-bot/pkg/bots/tgbot"
"github.com/nekomeowww/insights-bot/pkg/linkprev"
"github.com/nekomeowww/insights-bot/pkg/logger"
"github.com/nekomeowww/insights-bot/pkg/types/bot"
)
Expand All @@ -37,11 +38,12 @@ type NewModelParams struct {
}

type Model struct {
config *configs.Config
openai openai.Client
logger *logger.Logger
req *req.Client
ent *datastore.Ent
config *configs.Config
openai openai.Client
logger *logger.Logger
req *req.Client
linkprev *linkprev.Client
ent *datastore.Ent
}

func NewModel() func(NewModelParams) *Model {
Expand All @@ -50,10 +52,12 @@ func NewModel() func(NewModelParams) *Model {
config: param.Config,
req: req.
C().
SetUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"),
logger: param.Logger,
openai: param.OpenAIClient,
ent: param.Ent,
SetUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54").
EnableDumpEachRequest(),
linkprev: linkprev.NewClient(),
logger: param.Logger,
openai: param.OpenAIClient,
ent: param.Ent,
}
}
}
Expand Down Expand Up @@ -149,6 +153,20 @@ func (m *Model) extractContentFromURL(ctx context.Context, urlString string) (*r
return nil, errors.New("empty url")
}

if lo.Contains([]string{"twitter.com", "vxtwitter.com", "fxtwitter.com"}, parsedURL.Host) {
meta, err := m.linkprev.Preview(ctx, urlString)
if err != nil {
return nil, fmt.Errorf("failed to get url %s, %w: %v", parsedURL.String(), ErrNetworkError, err)
}

return &readability.Article{
Title: "Tweet",
Byline: meta.OpenGraph.Title,
Content: meta.OpenGraph.Description,
TextContent: meta.OpenGraph.Description,
}, nil
}

resp, err := m.req.
R().
EnableDump().
Expand Down
46 changes: 45 additions & 1 deletion internal/models/smr/smr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func TestExtractContentFromURL(t *testing.T) {
require.Nil(article)

assert.ErrorIs(err, ErrNetworkError)
assert.True(strings.Contains(err.Error(), `no such host`))
assert.Contains(err.Error(), `no such host`)
})

t.Run("WeChatOfficialAccount", func(t *testing.T) {
Expand All @@ -55,6 +55,50 @@ func TestExtractContentFromURL(t *testing.T) {
assert.NotEmpty(article.Title)
assert.NotEmpty(article.TextContent)
})

t.Run("Twitter", func(t *testing.T) {
t.Run("twitter.com", func(t *testing.T) {
assert := assert.New(t)
require := require.New(t)

article, err := model.extractContentFromURL(context.Background(), "https://twitter.com/GoogleDevEurope/status/1640667303158198272")
require.NoError(err)
require.NotNil(article)

assert.NotEmpty(article.Title)
assert.NotEmpty(article.Byline)
assert.NotEmpty(article.Content)
assert.NotEmpty(article.TextContent)
})

t.Run("vxtwitter.com", func(t *testing.T) {
assert := assert.New(t)
require := require.New(t)

article, err := model.extractContentFromURL(context.Background(), "https://vxtwitter.com/GoogleDevEurope/status/1640667303158198272")
require.NoError(err)
require.NotNil(article)

assert.NotEmpty(article.Title)
assert.NotEmpty(article.Byline)
assert.NotEmpty(article.Content)
assert.NotEmpty(article.TextContent)
})

t.Run("fxtwitter.com", func(t *testing.T) {
assert := assert.New(t)
require := require.New(t)

article, err := model.extractContentFromURL(context.Background(), "https://fxtwitter.com/GoogleDevEurope/status/1640667303158198272")
require.NoError(err)
require.NotNil(article)

assert.NotEmpty(article.Title)
assert.NotEmpty(article.Byline)
assert.NotEmpty(article.Content)
assert.NotEmpty(article.TextContent)
})
})
}

func TestContentTypeCheck(t *testing.T) {
Expand Down
5 changes: 3 additions & 2 deletions pkg/linkprev/linkprev.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ func NewClient() *Client {
return &Client{
reqClient: req.
C().
SetUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"),
SetUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54").
EnableDumpEachRequest(),
}
}

Expand Down Expand Up @@ -68,7 +69,7 @@ func (c *Client) newRequest(ctx context.Context, urlStr string) *req.Request {

// requestForTwitter is a special request for Twitter.
//
// We need to ask Twitter server to generate a SSR rendered HTML for us to get the metadatas
// We need to ask Twitter server to generate a SSR rendered HTML for us to get the metadata
// Learn more at:
// 1. https://stackoverflow.com/a/64332370/19954520
// 2. https://stackoverflow.com/a/64164115/19954520
Expand Down

0 comments on commit adedee6

Please sign in to comment.