diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 3b979c5de8c..309d0464368 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -2044,6 +2044,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) { } } +func TestFetchBilibiliWatchTime(t *testing.T) { + os.Clearenv() + os.Setenv("FETCH_BILIBILI_WATCH_TIME", "1") + + parser := NewParser() + opts, err := parser.ParseEnvironmentVariables() + if err != nil { + t.Fatalf(`Parsing failure: %v`, err) + } + + expected := true + result := opts.FetchBilibiliWatchTime() + + if result != expected { + t.Fatalf(`Unexpected FETCH_BILIBILI_WATCH_TIME value, got %v instead of %v`, result, expected) + } +} + func TestFetchNebulaWatchTime(t *testing.T) { os.Clearenv() os.Setenv("FETCH_NEBULA_WATCH_TIME", "1") diff --git a/internal/config/options.go b/internal/config/options.go index d5d793ac0e4..d2536070b6d 100644 --- a/internal/config/options.go +++ b/internal/config/options.go @@ -56,6 +56,7 @@ const ( defaultMediaResourceTypes = "image" defaultMediaProxyURL = "" defaultFilterEntryMaxAgeDays = 0 + defaultFetchBilibiliWatchTime = false defaultFetchNebulaWatchTime = false defaultFetchOdyseeWatchTime = false defaultFetchYouTubeWatchTime = false @@ -141,6 +142,7 @@ type Options struct { mediaProxyMode string mediaProxyResourceTypes []string mediaProxyCustomURL string + fetchBilibiliWatchTime bool fetchNebulaWatchTime bool fetchOdyseeWatchTime bool fetchYouTubeWatchTime bool @@ -218,6 +220,7 @@ func NewOptions() *Options { mediaProxyResourceTypes: []string{defaultMediaResourceTypes}, mediaProxyCustomURL: defaultMediaProxyURL, filterEntryMaxAgeDays: defaultFilterEntryMaxAgeDays, + fetchBilibiliWatchTime: defaultFetchBilibiliWatchTime, fetchNebulaWatchTime: defaultFetchNebulaWatchTime, fetchOdyseeWatchTime: defaultFetchOdyseeWatchTime, fetchYouTubeWatchTime: defaultFetchYouTubeWatchTime, @@ -501,6 +504,12 @@ func (o *Options) FetchOdyseeWatchTime() bool { return o.fetchOdyseeWatchTime } +// FetchBilibiliWatchTime returns true if the Bilibili video duration +// should be fetched and used as a reading time. +func (o *Options) FetchBilibiliWatchTime() bool { + return o.fetchBilibiliWatchTime +} + // MediaProxyMode returns "none" to never proxy, "http-only" to proxy non-HTTPS, "all" to always proxy. func (o *Options) MediaProxyMode() string { return o.mediaProxyMode @@ -658,6 +667,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option { "FETCH_YOUTUBE_WATCH_TIME": o.fetchYouTubeWatchTime, "FETCH_NEBULA_WATCH_TIME": o.fetchNebulaWatchTime, "FETCH_ODYSEE_WATCH_TIME": o.fetchOdyseeWatchTime, + "FETCH_BILIBILI_WATCH_TIME": o.fetchBilibiliWatchTime, "HTTPS": o.HTTPS, "HTTP_CLIENT_MAX_BODY_SIZE": o.httpClientMaxBodySize, "HTTP_CLIENT_PROXY": o.httpClientProxy, diff --git a/internal/config/parser.go b/internal/config/parser.go index f7e58aaa3a2..9ea4053cb43 100644 --- a/internal/config/parser.go +++ b/internal/config/parser.go @@ -259,6 +259,8 @@ func (p *Parser) parseLines(lines []string) (err error) { p.opts.metricsPassword = parseString(value, defaultMetricsPassword) case "METRICS_PASSWORD_FILE": p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword) + case "FETCH_BILIBILI_WATCH_TIME": + p.opts.fetchBilibiliWatchTime = parseBool(value, defaultFetchBilibiliWatchTime) case "FETCH_NEBULA_WATCH_TIME": p.opts.fetchNebulaWatchTime = parseBool(value, defaultFetchNebulaWatchTime) case "FETCH_ODYSEE_WATCH_TIME": diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index 7a5bdd33d70..74750d76039 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -33,6 +33,8 @@ var ( youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`) nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`) odyseeRegex = regexp.MustCompile(`^https://odysee\.com`) + bilibiliRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`) + timelengthRegex = regexp.MustCompile(`"timelength":\s*(\d+)`) iso8601Regex = regexp.MustCompile(`^P((?P\d+)Y)?((?P\d+)M)?((?P\d+)W)?((?P\d+)D)?(T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?)?$`) customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`) ) @@ -418,6 +420,25 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod } } + if shouldFetchBilibiliWatchTime(entry) { + if entryIsNew { + watchTime, err := fetchBilibiliWatchTime(entry.URL) + if err != nil { + slog.Warn("Unable to fetch Bilibili watch time", + slog.Int64("user_id", user.ID), + slog.Int64("entry_id", entry.ID), + slog.String("entry_url", entry.URL), + slog.Int64("feed_id", feed.ID), + slog.String("feed_url", feed.FeedURL), + slog.Any("error", err), + ) + } + entry.ReadingTime = watchTime + } else { + entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash) + } + } + // Handle YT error case and non-YT entries. if entry.ReadingTime == 0 { entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed) @@ -449,6 +470,15 @@ func shouldFetchOdyseeWatchTime(entry *model.Entry) bool { return matches != nil } +func shouldFetchBilibiliWatchTime(entry *model.Entry) bool { + if !config.Opts.FetchBilibiliWatchTime() { + return false + } + matches := bilibiliRegex.FindStringSubmatch(entry.URL) + urlMatchesBilibiliPattern := len(matches) == 2 + return urlMatchesBilibiliPattern +} + func fetchYouTubeWatchTime(websiteURL string) (int, error) { requestBuilder := fetcher.NewRequestBuilder() requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) @@ -544,6 +574,43 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) { return int(dur / 60), nil } +func fetchBilibiliWatchTime(websiteURL string) (int, error) { + requestBuilder := fetcher.NewRequestBuilder() + requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) + requestBuilder.WithProxy(config.Opts.HTTPClientProxy()) + + responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL)) + defer responseHandler.Close() + + if localizedError := responseHandler.LocalizedError(); localizedError != nil { + slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error())) + return 0, localizedError.Error() + } + + doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())) + if docErr != nil { + return 0, docErr + } + + timelengthMatches := timelengthRegex.FindStringSubmatch(doc.Text()) + if len(timelengthMatches) < 2 { + return 0, errors.New("duration has not found") + } + + durationMs, err := strconv.ParseInt(timelengthMatches[1], 10, 64) + if err != nil { + return 0, fmt.Errorf("unable to parse duration %s: %v", timelengthMatches[1], err) + } + + durationSec := durationMs / 1000 + durationMin := durationSec / 60 + if durationSec%60 != 0 { + durationMin++ + } + + return int(durationMin), nil +} + // parseISO8601 parses an ISO 8601 duration string. func parseISO8601(from string) (time.Duration, error) { var match []string diff --git a/miniflux.1 b/miniflux.1 index d0879f0982c..4edd0ff740c 100644 --- a/miniflux.1 +++ b/miniflux.1 @@ -244,6 +244,12 @@ Set the value to 1 to disable the internal scheduler service\&. .br Default is false (The internal scheduler service is enabled)\&. .TP +.B FETCH_BILIBILI_WATCH_TIME +Set the value to 1 to scrape video duration from Bilibili website and +use it as a reading time\&. +.br +Disabled by default\&. +.TP .B FETCH_NEBULA_WATCH_TIME Set the value to 1 to scrape video duration from Nebula website and use it as a reading time\&.