Skip to content

Commit

Permalink
Use Odysee video duration as read time
Browse files Browse the repository at this point in the history
This feature works by scraping the Odysee website.

To enable it, set the FETCH_ODYSEE_WATCH_TIME environment variable to
1.
  • Loading branch information
kmein authored and fguillot committed Aug 10, 2023
1 parent 859b446 commit 3060946
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 0 deletions.
18 changes: 18 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1598,6 +1598,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) {
}
}

func TestFetchOdyseeWatchTime(t *testing.T) {
os.Clearenv()
os.Setenv("FETCH_ODYSEE_WATCH_TIME", "1")

parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing failure: %v`, err)
}

expected := true
result := opts.FetchOdyseeWatchTime()

if result != expected {
t.Fatalf(`Unexpected FETCH_ODYSEE_WATCH_TIME value, got %v instead of %v`, result, expected)
}
}

func TestFetchYouTubeWatchTime(t *testing.T) {
os.Clearenv()
os.Setenv("FETCH_YOUTUBE_WATCH_TIME", "1")
Expand Down
10 changes: 10 additions & 0 deletions config/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ const (
defaultProxyOption = "http-only"
defaultProxyMediaTypes = "image"
defaultProxyUrl = ""
defaultFetchOdyseeWatchTime = false
defaultFetchYouTubeWatchTime = false
defaultYouTubeEmbedUrlOverride = "https://www.youtube-nocookie.com/embed/"
defaultCreateAdmin = false
Expand Down Expand Up @@ -126,6 +127,7 @@ type Options struct {
proxyOption string
proxyMediaTypes []string
proxyUrl string
fetchOdyseeWatchTime bool
fetchYouTubeWatchTime bool
youTubeEmbedUrlOverride string
oauth2UserCreationAllowed bool
Expand Down Expand Up @@ -196,6 +198,7 @@ func NewOptions() *Options {
proxyOption: defaultProxyOption,
proxyMediaTypes: []string{defaultProxyMediaTypes},
proxyUrl: defaultProxyUrl,
fetchOdyseeWatchTime: defaultFetchOdyseeWatchTime,
fetchYouTubeWatchTime: defaultFetchYouTubeWatchTime,
youTubeEmbedUrlOverride: defaultYouTubeEmbedUrlOverride,
oauth2UserCreationAllowed: defaultOAuth2UserCreation,
Expand Down Expand Up @@ -436,6 +439,12 @@ func (o *Options) YouTubeEmbedUrlOverride() string {
return o.youTubeEmbedUrlOverride
}

// FetchOdyseeWatchTime returns true if the Odysee video duration
// should be fetched and used as a reading time.
func (o *Options) FetchOdyseeWatchTime() bool {
return o.fetchOdyseeWatchTime
}

// ProxyOption returns "none" to never proxy, "http-only" to proxy non-HTTPS, "all" to always proxy.
func (o *Options) ProxyOption() string {
return o.proxyOption
Expand Down Expand Up @@ -581,6 +590,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option {
"DISABLE_HTTP_SERVICE": !o.httpService,
"DISABLE_SCHEDULER_SERVICE": !o.schedulerService,
"FETCH_YOUTUBE_WATCH_TIME": o.fetchYouTubeWatchTime,
"FETCH_ODYSEE_WATCH_TIME": o.fetchOdyseeWatchTime,
"HTTPS": o.HTTPS,
"HTTP_CLIENT_MAX_BODY_SIZE": o.httpClientMaxBodySize,
"HTTP_CLIENT_PROXY": o.httpClientProxy,
Expand Down
2 changes: 2 additions & 0 deletions config/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
p.opts.metricsPassword = parseString(value, defaultMetricsPassword)
case "METRICS_PASSWORD_FILE":
p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword)
case "FETCH_ODYSEE_WATCH_TIME":
p.opts.fetchOdyseeWatchTime = parseBool(value, defaultFetchOdyseeWatchTime)
case "FETCH_YOUTUBE_WATCH_TIME":
p.opts.fetchYouTubeWatchTime = parseBool(value, defaultFetchYouTubeWatchTime)
case "YOUTUBE_EMBED_URL_OVERRIDE":
Expand Down
6 changes: 6 additions & 0 deletions miniflux.1
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ Set the value to 1 to enable debug logs\&.
.br
Disabled by default\&.
.TP
.B FETCH_ODYSEE_WATCH_TIME
Set the value to 1 to scrape video duration from Odysee website and
use it as a reading time\&.
.br
Disabled by default\&.
.TP
.B FETCH_YOUTUBE_WATCH_TIME
Set the value to 1 to scrape video duration from YouTube website and
use it as a reading time\&.
Expand Down
46 changes: 46 additions & 0 deletions reader/processor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (

var (
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
)
Expand Down Expand Up @@ -207,6 +208,17 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
}
}

if shouldFetchOdyseeWatchTime(entry) {
if entryIsNew {
watchTime, err := fetchOdyseeWatchTime(entry.URL)
if err != nil {
logger.Error("[Processor] Unable to fetch Odysee watch time: %q => %v", entry.URL, err)
}
entry.ReadingTime = watchTime
} else {
entry.ReadingTime = store.GetReadTime(entry, feed)
}
}
// Handle YT error case and non-YT entries.
if entry.ReadingTime == 0 {
entry.ReadingTime = calculateReadingTime(entry.Content, user)
Expand All @@ -222,6 +234,14 @@ func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
return urlMatchesYouTubePattern
}

func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchOdyseeWatchTime() {
return false
}
matches := odyseeRegex.FindStringSubmatch(entry.URL)
return matches != nil
}

func fetchYouTubeWatchTime(url string) (int, error) {
clt := client.NewClientWithConfig(url, config.Opts)
response, browserErr := browser.Exec(clt)
Expand All @@ -247,6 +267,32 @@ func fetchYouTubeWatchTime(url string) (int, error) {
return int(dur.Minutes()), nil
}

func fetchOdyseeWatchTime(url string) (int, error) {
clt := client.NewClientWithConfig(url, config.Opts)
response, browserErr := browser.Exec(clt)
if browserErr != nil {
return 0, browserErr
}

doc, docErr := goquery.NewDocumentFromReader(response.Body)
if docErr != nil {
return 0, docErr
}

durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
// durs contains video watch time in seconds
if !exists {
return 0, errors.New("duration has not found")
}

dur, err := strconv.ParseInt(durs, 10, 64)
if err != nil {
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
}

return int(dur / 60), nil
}

// parseISO8601 parses an ISO 8601 duration string.
func parseISO8601(from string) (time.Duration, error) {
var match []string
Expand Down

0 comments on commit 3060946

Please sign in to comment.