Skip to content

Commit

Permalink
feat: add FETCH_BILIBILI_WATCH_TIME config option
Browse files Browse the repository at this point in the history
  • Loading branch information
Qeynos authored Aug 2, 2024
1 parent 569529d commit bcbf9f4
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 0 deletions.
18 changes: 18 additions & 0 deletions internal/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2044,6 +2044,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) {
}
}

func TestFetchBilibiliWatchTime(t *testing.T) {
os.Clearenv()
os.Setenv("FETCH_BILIBILI_WATCH_TIME", "1")

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing failure: %v`, err)
}

expected := true
result := opts.FetchBilibiliWatchTime()

if result != expected {
t.Fatalf(`Unexpected FETCH_BILIBILI_WATCH_TIME value, got %v instead of %v`, result, expected)
}
}

func TestFetchNebulaWatchTime(t *testing.T) {
os.Clearenv()
os.Setenv("FETCH_NEBULA_WATCH_TIME", "1")
Expand Down
10 changes: 10 additions & 0 deletions internal/config/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const (
defaultMediaResourceTypes = "image"
defaultMediaProxyURL = ""
defaultFilterEntryMaxAgeDays = 0
defaultFetchBilibiliWatchTime = false
defaultFetchNebulaWatchTime = false
defaultFetchOdyseeWatchTime = false
defaultFetchYouTubeWatchTime = false
Expand Down Expand Up @@ -141,6 +142,7 @@ type Options struct {
mediaProxyMode string
mediaProxyResourceTypes []string
mediaProxyCustomURL string
fetchBilibiliWatchTime bool
fetchNebulaWatchTime bool
fetchOdyseeWatchTime bool
fetchYouTubeWatchTime bool
Expand Down Expand Up @@ -218,6 +220,7 @@ func NewOptions() *Options {
mediaProxyResourceTypes: []string{defaultMediaResourceTypes},
mediaProxyCustomURL: defaultMediaProxyURL,
filterEntryMaxAgeDays: defaultFilterEntryMaxAgeDays,
fetchBilibiliWatchTime: defaultFetchBilibiliWatchTime,
fetchNebulaWatchTime: defaultFetchNebulaWatchTime,
fetchOdyseeWatchTime: defaultFetchOdyseeWatchTime,
fetchYouTubeWatchTime: defaultFetchYouTubeWatchTime,
Expand Down Expand Up @@ -501,6 +504,12 @@ func (o *Options) FetchOdyseeWatchTime() bool {
return o.fetchOdyseeWatchTime
}

// FetchBilibiliWatchTime returns true if the Bilibili video duration
// should be fetched and used as a reading time.
func (o *Options) FetchBilibiliWatchTime() bool {
return o.fetchBilibiliWatchTime
}

// MediaProxyMode returns "none" to never proxy, "http-only" to proxy non-HTTPS, "all" to always proxy.
func (o *Options) MediaProxyMode() string {
return o.mediaProxyMode
Expand Down Expand Up @@ -658,6 +667,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option {
"FETCH_YOUTUBE_WATCH_TIME": o.fetchYouTubeWatchTime,
"FETCH_NEBULA_WATCH_TIME": o.fetchNebulaWatchTime,
"FETCH_ODYSEE_WATCH_TIME": o.fetchOdyseeWatchTime,
"FETCH_BILIBILI_WATCH_TIME": o.fetchBilibiliWatchTime,
"HTTPS": o.HTTPS,
"HTTP_CLIENT_MAX_BODY_SIZE": o.httpClientMaxBodySize,
"HTTP_CLIENT_PROXY": o.httpClientProxy,
Expand Down
2 changes: 2 additions & 0 deletions internal/config/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
p.opts.metricsPassword = parseString(value, defaultMetricsPassword)
case "METRICS_PASSWORD_FILE":
p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword)
case "FETCH_BILIBILI_WATCH_TIME":
p.opts.fetchBilibiliWatchTime = parseBool(value, defaultFetchBilibiliWatchTime)
case "FETCH_NEBULA_WATCH_TIME":
p.opts.fetchNebulaWatchTime = parseBool(value, defaultFetchNebulaWatchTime)
case "FETCH_ODYSEE_WATCH_TIME":
Expand Down
67 changes: 67 additions & 0 deletions internal/reader/processor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ var (
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)
odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
bilibiliRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
timelengthRegex = regexp.MustCompile(`"timelength":\s*(\d+)`)
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
)
Expand Down Expand Up @@ -418,6 +420,25 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
}
}

if shouldFetchBilibiliWatchTime(entry) {
if entryIsNew {
watchTime, err := fetchBilibiliWatchTime(entry.URL)
if err != nil {
slog.Warn("Unable to fetch Bilibili watch time",
slog.Int64("user_id", user.ID),
slog.Int64("entry_id", entry.ID),
slog.String("entry_url", entry.URL),
slog.Int64("feed_id", feed.ID),
slog.String("feed_url", feed.FeedURL),
slog.Any("error", err),
)
}
entry.ReadingTime = watchTime
} else {
entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash)
}
}

// Handle YT error case and non-YT entries.
if entry.ReadingTime == 0 {
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
Expand Down Expand Up @@ -449,6 +470,15 @@ func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
return matches != nil
}

func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchBilibiliWatchTime() {
return false
}
matches := bilibiliRegex.FindStringSubmatch(entry.URL)
urlMatchesBilibiliPattern := len(matches) == 2
return urlMatchesBilibiliPattern
}

func fetchYouTubeWatchTime(websiteURL string) (int, error) {
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
Expand Down Expand Up @@ -544,6 +574,43 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) {
return int(dur / 60), nil
}

func fetchBilibiliWatchTime(websiteURL string) (int, error) {
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())

responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
defer responseHandler.Close()

if localizedError := responseHandler.LocalizedError(); localizedError != nil {
slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
return 0, localizedError.Error()
}

doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
if docErr != nil {
return 0, docErr
}

timelengthMatches := timelengthRegex.FindStringSubmatch(doc.Text())
if len(timelengthMatches) < 2 {
return 0, errors.New("duration has not found")
}

durationMs, err := strconv.ParseInt(timelengthMatches[1], 10, 64)
if err != nil {
return 0, fmt.Errorf("unable to parse duration %s: %v", timelengthMatches[1], err)
}

durationSec := durationMs / 1000
durationMin := durationSec / 60
if durationSec%60 != 0 {
durationMin++
}

return int(durationMin), nil
}

// parseISO8601 parses an ISO 8601 duration string.
func parseISO8601(from string) (time.Duration, error) {
var match []string
Expand Down
6 changes: 6 additions & 0 deletions miniflux.1
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,12 @@ Set the value to 1 to disable the internal scheduler service\&.
.br
Default is false (The internal scheduler service is enabled)\&.
.TP
.B FETCH_BILIBILI_WATCH_TIME
Set the value to 1 to scrape video duration from Bilibili website and
use it as a reading time\&.
.br
Disabled by default\&.
.TP
.B FETCH_NEBULA_WATCH_TIME
Set the value to 1 to scrape video duration from Nebula website and
use it as a reading time\&.
Expand Down

0 comments on commit bcbf9f4

Please sign in to comment.