From 6e3de0f956f611cc27a592fecd44317650591d15 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Thu, 25 Apr 2024 12:39:20 +0100 Subject: [PATCH 1/3] move find_download_url from document to query namespace --- lib/MetaCPAN/Document/File/Set.pm | 226 +---------------------------- lib/MetaCPAN/Query/Release.pm | 233 ++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+), 225 deletions(-) diff --git a/lib/MetaCPAN/Document/File/Set.pm b/lib/MetaCPAN/Document/File/Set.pm index abe2a62b9..c758497df 100644 --- a/lib/MetaCPAN/Document/File/Set.pm +++ b/lib/MetaCPAN/Document/File/Set.pm @@ -51,7 +51,7 @@ has query_release => ( isa => 'MetaCPAN::Query::Release', lazy => 1, builder => '_build_query_release', - handles => [qw< get_checksums >], + handles => [qw< find_download_url >], ); sub _build_query_release { @@ -203,230 +203,6 @@ sub documented_modules { ->source( [qw(name module path documentation distribution)] )->all; } -=head2 find_download_url - - -cpanm Foo -=> status: latest, maturity: released - -cpanm --dev Foo -=> status: -backpan, sort_by: version_numified,date - -cpanm Foo~1.0 -=> status: latest, maturity: released, module.version_numified: gte: 1.0 - -cpanm --dev Foo~1.0 --> status: -backpan, module.version_numified: gte: 1.0, sort_by: version_numified,date - -cpanm Foo~<2 -=> maturity: released, module.version_numified: lt: 2, sort_by: status,version_numified,date - -cpanm --dev Foo~<2 -=> status: -backpan, module.version_numified: lt: 2, sort_by: status,version_numified,date - - $file->find_download_url( 'Foo', { version => $version, dev => 0|1 }); - -Sorting: - - if it's stable: - prefer latest > cpan > backpan - then sort by version desc - then sort by date descending (rev chron) - - if it's dev: - sort by version desc - sort by date descending (reverse chronologically) - - -=cut - -sub find_download_url { - my ( $self, $module, $args ) = @_; - $args ||= {}; - - my $dev = $args->{dev}; - my $version = $args->{version}; - my $explicit_version = $version && $version =~ /==/; - - my @filters; - if ( !$explicit_version ) { - push @filters, { not => { term => { status => 'backpan' } } }; - if ( !$dev ) { - push @filters, { term => { maturity => 'released' } }; - } - } - - my $version_filters = $self->_version_filters($version); - - # filters to be applied to the nested modules - my $module_f = { - nested => { - path => 'module', - inner_hits => { _source => 'version' }, - filter => { - bool => { - must => [ - { term => { 'module.authorized' => 1 } }, - { term => { 'module.indexed' => 1 } }, - { term => { 'module.name' => $module } }, - ( - exists $version_filters->{must} - ? @{ $version_filters->{must} } - : () - ) - ], - ( - exists $version_filters->{must_not} - ? ( must_not => [ $version_filters->{must_not} ] ) - : () - ) - } - } - } - }; - - my $filter - = @filters - ? { bool => { must => [ @filters, $module_f ] } } - : $module_f; - - # sort by score, then version desc, then date desc - my @sort = ( - '_score', - { - 'module.version_numified' => { - mode => 'max', - order => 'desc', - nested_path => 'module', - nested_filter => $module_f->{nested}{filter} - } - }, - { date => { order => 'desc' } } - ); - - my $query; - - if ($dev) { - $query = { filtered => { filter => $filter } }; - } - else { - # if not dev, then prefer latest > cpan > backpan - $query = { - function_score => { - filter => $filter, - score_mode => 'first', - boost_mode => 'replace', - functions => [ - { - filter => { term => { status => 'latest' } }, - weight => 3 - }, - { - filter => { term => { status => 'cpan' } }, - weight => 2 - }, - { filter => { match_all => {} }, weight => 1 }, - ] - } - }; - } - - my $res - = $self->size(1)->query($query) - ->source( [ 'release', 'download_url', 'date', 'status' ] ) - ->search_type('dfs_query_then_fetch')->sort( \@sort )->raw->all; - return unless $res->{hits}{total}; - - my @checksums; - - my $hit = $res->{hits}{hits}[0]; - my $release = exists $hit->{_source} ? $hit->{_source}{release} : undef; - - if ($release) { - my $checksums = $self->get_checksums($release); - @checksums = ( - ( - $checksums->{checksum_md5} - ? ( checksum_md5 => $checksums->{checksum_md5} ) - : () - ), - ( - $checksums->{checksum_sha256} - ? ( checksum_sha256 => $checksums->{checksum_sha256} ) - : () - ), - ); - } - - return +{ - %{ $hit->{_source} }, - %{ $hit->{inner_hits}{module}{hits}{hits}[0]{_source} }, @checksums, - }; -} - -sub _version_filters { - my ( $self, $version ) = @_; - - return () unless $version; - - if ( $version =~ s/^==\s*// ) { - return +{ - must => [ { - term => { - 'module.version_numified' => $self->_numify($version) - } - } ] - }; - } - elsif ( $version =~ /^[<>!]=?\s*/ ) { - my %ops = qw(< lt <= lte > gt >= gte); - my ( %filters, %range, @exclusion ); - my @requirements = split /,\s*/, $version; - for my $r (@requirements) { - if ( $r =~ s/^([<>]=?)\s*// ) { - $range{ $ops{$1} } = $self->_numify($r); - } - elsif ( $r =~ s/\!=\s*// ) { - push @exclusion, $self->_numify($r); - } - } - - if ( keys %range ) { - $filters{must} - = [ { range => { 'module.version_numified' => \%range } } ]; - } - - if (@exclusion) { - $filters{must_not} = []; - push @{ $filters{must_not} }, map { - +{ - term => { - 'module.version_numified' => $self->_numify($_) - } - } - } @exclusion; - } - - return \%filters; - } - elsif ( $version !~ /\s/ ) { - return +{ - must => [ { - range => { - 'module.version_numified' => - { 'gte' => $self->_numify($version) } - }, - } ] - }; - } -} - -sub _numify { - my ( $self, $ver ) = @_; - $ver =~ s/_//g; - version->new($ver)->numify; -} - =head2 history Find the history of a given module/documentation. diff --git a/lib/MetaCPAN/Query/Release.pm b/lib/MetaCPAN/Query/Release.pm index ce8d51910..77b189e6a 100644 --- a/lib/MetaCPAN/Query/Release.pm +++ b/lib/MetaCPAN/Query/Release.pm @@ -992,5 +992,238 @@ sub modules { }; } +=head2 find_download_url + +cpanm Foo +=> status: latest, maturity: released + +cpanm --dev Foo +=> status: -backpan, sort_by: version_numified,date + +cpanm Foo~1.0 +=> status: latest, maturity: released, module.version_numified: gte: 1.0 + +cpanm --dev Foo~1.0 +-> status: -backpan, module.version_numified: gte: 1.0, sort_by: version_numified,date + +cpanm Foo~<2 +=> maturity: released, module.version_numified: lt: 2, sort_by: status,version_numified,date + +cpanm --dev Foo~<2 +=> status: -backpan, module.version_numified: lt: 2, sort_by: status,version_numified,date + + $release->find_download_url( 'Foo', { version => $version, dev => 0|1 }); + +Sorting: + + if it's stable: + prefer latest > cpan > backpan + then sort by version desc + then sort by date descending (rev chron) + + if it's dev: + sort by version desc + sort by date descending (reverse chronologically) + + +=cut + +sub find_download_url { + my ( $self, $module, $args ) = @_; + $args ||= {}; + + my $dev = $args->{dev}; + my $version = $args->{version}; + my $explicit_version = $version && $version =~ /==/; + + my @filters; + if ( !$explicit_version ) { + push @filters, { not => { term => { status => 'backpan' } } }; + if ( !$dev ) { + push @filters, { term => { maturity => 'released' } }; + } + } + + my $version_filters = $self->_version_filters($version); + + # filters to be applied to the nested modules + my $module_f = { + nested => { + path => 'module', + inner_hits => { _source => 'version' }, + filter => { + bool => { + must => [ + { term => { 'module.authorized' => 1 } }, + { term => { 'module.indexed' => 1 } }, + { term => { 'module.name' => $module } }, + ( + exists $version_filters->{must} + ? @{ $version_filters->{must} } + : () + ) + ], + ( + exists $version_filters->{must_not} + ? ( must_not => [ $version_filters->{must_not} ] ) + : () + ) + } + } + } + }; + + my $filter + = @filters + ? { bool => { must => [ @filters, $module_f ] } } + : $module_f; + + # sort by score, then version desc, then date desc + my @sort = ( + '_score', + { + 'module.version_numified' => { + mode => 'max', + order => 'desc', + nested_path => 'module', + nested_filter => $module_f->{nested}{filter} + } + }, + { date => { order => 'desc' } } + ); + + my $query; + + if ($dev) { + $query = { filtered => { filter => $filter } }; + } + else { + # if not dev, then prefer latest > cpan > backpan + $query = { + function_score => { + filter => $filter, + score_mode => 'first', + boost_mode => 'replace', + functions => [ + { + filter => { term => { status => 'latest' } }, + weight => 3 + }, + { + filter => { term => { status => 'cpan' } }, + weight => 2 + }, + { filter => { match_all => {} }, weight => 1 }, + ] + } + }; + } + + my $body = { + query => $query, + size => 1, + sort => \@sort, + _source => [ 'release', 'download_url', 'date', 'status' ], + search_type => 'dfs_query_then_fetch', + }; + + my $ret = $self->es->search( + index => $self->index_name, + type => 'file', + body => $body, + ); + + return unless $res->{hits}{total}; + + my @checksums; + + my $hit = $res->{hits}{hits}[0]; + my $release = exists $hit->{_source} ? $hit->{_source}{release} : undef; + + if ($release) { + my $checksums = $self->get_checksums($release); + @checksums = ( + ( + $checksums->{checksum_md5} + ? ( checksum_md5 => $checksums->{checksum_md5} ) + : () + ), + ( + $checksums->{checksum_sha256} + ? ( checksum_sha256 => $checksums->{checksum_sha256} ) + : () + ), + ); + } + + return +{ + %{ $hit->{_source} }, + %{ $hit->{inner_hits}{module}{hits}{hits}[0]{_source} }, @checksums, + }; +} + +sub _version_filters { + my ( $self, $version ) = @_; + + return () unless $version; + + if ( $version =~ s/^==\s*// ) { + return +{ + must => [ { + term => { + 'module.version_numified' => $self->_numify($version) + } + } ] + }; + } + elsif ( $version =~ /^[<>!]=?\s*/ ) { + my %ops = qw(< lt <= lte > gt >= gte); + my ( %filters, %range, @exclusion ); + my @requirements = split /,\s*/, $version; + for my $r (@requirements) { + if ( $r =~ s/^([<>]=?)\s*// ) { + $range{ $ops{$1} } = $self->_numify($r); + } + elsif ( $r =~ s/\!=\s*// ) { + push @exclusion, $self->_numify($r); + } + } + + if ( keys %range ) { + $filters{must} + = [ { range => { 'module.version_numified' => \%range } } ]; + } + + if (@exclusion) { + $filters{must_not} = []; + push @{ $filters{must_not} }, map { + +{ + term => { + 'module.version_numified' => $self->_numify($_) + } + } + } @exclusion; + } + + return \%filters; + } + elsif ( $version !~ /\s/ ) { + return +{ + must => [ { + range => { + 'module.version_numified' => + { 'gte' => $self->_numify($version) } + }, + } ] + }; + } +} + +sub _numify { + my ( $self, $ver ) = @_; + $ver =~ s/_//g; + version->new($ver)->numify; +} + __PACKAGE__->meta->make_immutable; 1; From 804b967250750f910071b73fd57827186f2fedd2 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Thu, 25 Apr 2024 13:04:10 +0100 Subject: [PATCH 2/3] extend find_download_url to be able to query dists rather than modules --- lib/MetaCPAN/Query/Release.pm | 134 ++++++++++-------- .../Server/Controller/Search/DownloadURL.pm | 4 +- 2 files changed, 76 insertions(+), 62 deletions(-) diff --git a/lib/MetaCPAN/Query/Release.pm b/lib/MetaCPAN/Query/Release.pm index 77b189e6a..70e443859 100644 --- a/lib/MetaCPAN/Query/Release.pm +++ b/lib/MetaCPAN/Query/Release.pm @@ -1012,7 +1012,7 @@ cpanm Foo~<2 cpanm --dev Foo~<2 => status: -backpan, module.version_numified: lt: 2, sort_by: status,version_numified,date - $release->find_download_url( 'Foo', { version => $version, dev => 0|1 }); + $release->find_download_url( 'module', 'Foo', { version => $version, dev => 0|1 }); Sorting: @@ -1029,7 +1029,7 @@ Sorting: =cut sub find_download_url { - my ( $self, $module, $args ) = @_; + my ( $self, $type, $module, $args ) = @_; $args ||= {}; my $dev = $args->{dev}; @@ -1037,6 +1037,11 @@ sub find_download_url { my $explicit_version = $version && $version =~ /==/; my @filters; + + die 'type must be module or dist' + unless $type eq 'module' || $type eq 'dist'; + my $module_filter = $type eq 'module'; + if ( !$explicit_version ) { push @filters, { not => { term => { status => 'backpan' } } }; if ( !$dev ) { @@ -1044,53 +1049,65 @@ sub find_download_url { } } - my $version_filters = $self->_version_filters($version); + my $prefix = $module_filter ? 'module.' : ''; + + my $version_filters + = $self->_version_filters( $version, $prefix . 'version_numified' ); + + my $entity_filter = { + bool => { + must => [ + { term => { $prefix . 'authorized' => 1 } }, + { term => { $prefix . 'indexed' => 1 } }, + { term => { $prefix . 'name' => $module } }, + ( + exists $version_filters->{must} + ? @{ $version_filters->{must} } + : () + ) + ], + ( + exists $version_filters->{must_not} + ? ( must_not => [ $version_filters->{must_not} ] ) + : () + ) + } + }; # filters to be applied to the nested modules - my $module_f = { - nested => { - path => 'module', - inner_hits => { _source => 'version' }, - filter => { - bool => { - must => [ - { term => { 'module.authorized' => 1 } }, - { term => { 'module.indexed' => 1 } }, - { term => { 'module.name' => $module } }, - ( - exists $version_filters->{must} - ? @{ $version_filters->{must} } - : () - ) - ], - ( - exists $version_filters->{must_not} - ? ( must_not => [ $version_filters->{must_not} ] ) - : () - ) - } + if ($module_filter) { + push @filters, + { + nested => { + path => 'module', + inner_hits => { _source => 'version' }, + filter => $entity_filter, } - } - }; + }; + } + else { + push @filters, $entity_filter; + } my $filter = @filters - ? { bool => { must => [ @filters, $module_f ] } } - : $module_f; + ? { bool => { must => \@filters } } + : $filters[0]; + + my $version_sort + = $module_filter + ? { + 'module.version_numified' => { + mode => 'max', + order => 'desc', + nested_path => 'module', + nested_filter => $entity_filter, + } + } + : { version_numified => { order => 'desc' } }; # sort by score, then version desc, then date desc - my @sort = ( - '_score', - { - 'module.version_numified' => { - mode => 'max', - order => 'desc', - nested_path => 'module', - nested_filter => $module_f->{nested}{filter} - } - }, - { date => { order => 'desc' } } - ); + my @sort = ( '_score', $version_sort, { date => { order => 'desc' } } ); my $query; @@ -1120,17 +1137,17 @@ sub find_download_url { } my $body = { - query => $query, - size => 1, - sort => \@sort, + query => $query, + size => 1, + sort => \@sort, _source => [ 'release', 'download_url', 'date', 'status' ], - search_type => 'dfs_query_then_fetch', }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'file', - body => $body, + my $res = $self->es->search( + index => $self->index_name, + type => $module_filter ? 'file' : 'release', + body => $body, + search_type => 'dfs_query_then_fetch', ); return unless $res->{hits}{total}; @@ -1163,7 +1180,7 @@ sub find_download_url { } sub _version_filters { - my ( $self, $version ) = @_; + my ( $self, $version, $field ) = @_; return () unless $version; @@ -1171,7 +1188,7 @@ sub _version_filters { return +{ must => [ { term => { - 'module.version_numified' => $self->_numify($version) + $field => $self->_numify($version) } } ] }; @@ -1191,18 +1208,14 @@ sub _version_filters { if ( keys %range ) { $filters{must} - = [ { range => { 'module.version_numified' => \%range } } ]; + = [ { range => { $field => \%range } } ]; } if (@exclusion) { $filters{must_not} = []; - push @{ $filters{must_not} }, map { - +{ - term => { - 'module.version_numified' => $self->_numify($_) - } - } - } @exclusion; + push @{ $filters{must_not} }, + map { +{ term => { $field => $self->_numify($_) } } } + @exclusion; } return \%filters; @@ -1211,8 +1224,7 @@ sub _version_filters { return +{ must => [ { range => { - 'module.version_numified' => - { 'gte' => $self->_numify($version) } + $field => { 'gte' => $self->_numify($version) } }, } ] }; diff --git a/lib/MetaCPAN/Server/Controller/Search/DownloadURL.pm b/lib/MetaCPAN/Server/Controller/Search/DownloadURL.pm index 9ce7c8f2d..f52e3b92d 100644 --- a/lib/MetaCPAN/Server/Controller/Search/DownloadURL.pm +++ b/lib/MetaCPAN/Server/Controller/Search/DownloadURL.pm @@ -13,8 +13,10 @@ has '+type' => ( default => 'file' ); sub get : Local : Path('/download_url') : Args(1) { my ( $self, $c, $module ) = @_; + my $type = 'module'; my $data - = $self->model($c)->find_download_url( $module, $c->req->params ); + = $self->model($c) + ->find_download_url( $type, $module, $c->req->params ); return $c->detach( '/not_found', [] ) unless $data; $c->stash($data); } From 0d850ff7abaead7dd4e7ac3785c849a4fd364401 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Thu, 25 Apr 2024 13:06:25 +0100 Subject: [PATCH 3/3] download_url end point should query releases for "perl" --- lib/MetaCPAN/Server/Controller/Search/DownloadURL.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MetaCPAN/Server/Controller/Search/DownloadURL.pm b/lib/MetaCPAN/Server/Controller/Search/DownloadURL.pm index f52e3b92d..1b16ab609 100644 --- a/lib/MetaCPAN/Server/Controller/Search/DownloadURL.pm +++ b/lib/MetaCPAN/Server/Controller/Search/DownloadURL.pm @@ -13,7 +13,7 @@ has '+type' => ( default => 'file' ); sub get : Local : Path('/download_url') : Args(1) { my ( $self, $c, $module ) = @_; - my $type = 'module'; + my $type = $module eq 'perl' ? 'dist' : 'module'; my $data = $self->model($c) ->find_download_url( $type, $module, $c->req->params );