Skip to content

Commit

Permalink
contributors: fetch all contrib pauseids in one query
Browse files Browse the repository at this point in the history
Rather than trying to fill in each pause id on a separate query, run one
query for all contributors. Significantly speeds up fetches for large
contributor lists.
  • Loading branch information
haarg committed Nov 4, 2024
1 parent acbcb5d commit e3ce2c4
Showing 1 changed file with 22 additions and 13 deletions.
35 changes: 22 additions & 13 deletions lib/MetaCPAN/Query/Release.pm
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ sub get_contributors {
$dupe ? () : $info;
} ( @$authors, @$contribs );

my %want_email;
for my $contrib (@contribs) {

# heuristic to autofill pause accounts
Expand All @@ -165,20 +166,28 @@ sub get_contributors {

}

# check if contributor's email points to a registered author
if ( !$contrib->{pauseid} ) {
for my $email ( @{ $contrib->{email} } ) {
my $check_author = $self->es->search(
es_doc_path('author'),
body => {
query => { term => { email => $email } },
size => 10,
}
);
push @{ $want_email{$_} }, $contrib
for @{ $contrib->{email} };
}

if (%want_email) {
my $check_author = $self->es->search(
es_doc_path('author'),
body => {
query => { terms => { email => [ sort keys %want_email ] } },
_source => [ 'email', 'pauseid' ],
size => 100,
},
);

if ( hit_total($check_author) ) {
$contrib->{pauseid}
= uc $check_author->{hits}{hits}[0]{_source}{pauseid};
for my $author ( @{ $check_author->{hits}{hits} } ) {
my $emails = $author->{_source}{email};
$emails = [ $emails ]
if !ref $emails;
my $pauseid = uc $author->{_source}{pauseid};
for my $email ( @$emails ) {
for my $contrib ( @{ $want_email{$email} } ) {
$contrib->{pauseid} = $pauseid;
}
}
}
Expand Down

0 comments on commit e3ce2c4

Please sign in to comment.