Skip to content

Commit

Permalink
Merge branch 'main' into feature/skip-sitemap-constant
Browse files Browse the repository at this point in the history
  • Loading branch information
renatonascalves authored Jan 30, 2024
2 parents 0f6a3b4 + f720e5e commit 87a8b24
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 13 deletions.
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@ See 'wp help msm-sitemap <command>' for more information on a specific command.

Include custom post types in the generated sitemap with the `msm_sitemap_entry_post_type` filter.

## Generate Sitemap with posts of a custom status other than 'publish'

By default, the sitemap will only fetch posts with the status of 'publish'. To change this, use the `msm_sitemap_post_status` filter.

```
function example_filter_msm_sitemap_post_status( $post_status ) {
return 'my_custom_status';
}
add_filter( 'msm_sitemap_post_status', 'example_filter_msm_sitemap_post_status', 10, 1 );
```

## Filtering Sitemap URLs

If you need to filter the URLs displayed in a sitemap created via the Comprehensive Sitemap plugin, there are two considerations. First, if you are filtering the individual sitemaps, which display the URLs to the articles published on a specific date, you can use the `msm_sitemap_entry` hook to filter the URLs. An example for a reverse-proxy situation is below:
Expand Down Expand Up @@ -83,3 +94,19 @@ add_filter( 'msm_sitemap_index', function( $sitemaps ) {
} );
} );
```

## Customize the last modified posts query

Use the `msm_pre_get_last_modified_posts` filter to customize the query that gets the last modified posts.

On large sites, this filter could be leveraged to enhance query efficiency by avoiding scanning older posts that don't get updated frequently and making better use of the `type_status_date` index.

```
function ( $query, $post_types_in, $date ) {
global $wpdb;
$query = $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_type IN ( {$post_types_in} ) AND post_status = 'publish' AND post_date >= DATE_SUB(NOW(), INTERVAL 3 MONTH) AND post_modified_gmt >= %s LIMIT 1000", $date );
return $query;
};
```
63 changes: 51 additions & 12 deletions msm-sitemap.php
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public static function ajax_get_sitemap_counts() {

$data = array(
'total_indexed_urls' => number_format( Metro_Sitemap::get_total_indexed_url_count() ),
'total_sitemaps' => number_format( Metro_Sitemap::count_sitemaps() ),
'total_sitemaps' => number_format( Metro_Sitemap::count_sitemaps() ),
'sitemap_indexed_urls' => self::get_recent_sitemap_url_counts( $n ),
);

Expand Down Expand Up @@ -158,7 +158,7 @@ public static function render_sitemap_options_page() {
if ( isset( $_POST['action'] ) ) {
check_admin_referer( 'msm-sitemap-action' );
foreach ( $actions as $slug => $action ) {
if ( $action['text'] !== $_POST['action'] ) continue;
if ( $action['text'] !== $_POST['action'] ) continue;

do_action( 'msm_sitemap_action-' . $slug );
break;
Expand Down Expand Up @@ -325,14 +325,36 @@ public static function disable_canonical_redirects_for_sitemap_xml( $redirect_ur
return $redirect_url;
}

/**
* Hook allows developers to extend the sitemap functionality easily and integrate their custom post statuses.
*
* Rather than having to modify the plugin code, developers can use this filter to add their custom post statuses.
*
* @since 1.4.3
*
*/
public static function get_post_status(): string {
$default_status = 'publish';
$post_status = apply_filters('msm_sitemap_post_status', $default_status);

$allowed_statuses = get_post_stati();

if (!in_array($post_status, $allowed_statuses)) {
$post_status = $default_status;
}

return $post_status;
}

/**
* Return range of years for posts in the database
* @return int[] valid years
*/
public static function get_post_year_range() {
global $wpdb;
$post_status = self::get_post_status();

$oldest_post_date_year = $wpdb->get_var( "SELECT DISTINCT YEAR(post_date) as year FROM $wpdb->posts WHERE post_status = 'publish' AND post_date > 0 ORDER BY year ASC LIMIT 1" );
$oldest_post_date_year = $wpdb->get_var( $wpdb->prepare( "SELECT DISTINCT YEAR(post_date) as year FROM $wpdb->posts WHERE post_status = %s AND post_date > 0 ORDER BY year ASC LIMIT 1", $post_status ) );

if ( null !== $oldest_post_date_year ) {
$current_year = date( 'Y' );
Expand Down Expand Up @@ -383,9 +405,10 @@ public static function date_range_has_posts( $start_date, $end_date ) {

$start_date .= ' 00:00:00';
$end_date .= ' 23:59:59';
$post_status = self::get_post_status();

$post_types_in = self::get_supported_post_types_in();
return $wpdb->get_var( $wpdb->prepare( "SELECT ID FROM $wpdb->posts WHERE post_status = 'publish' AND post_date >= %s AND post_date <= %s AND post_type IN ( {$post_types_in} ) LIMIT 1", $start_date, $end_date ) );
return $wpdb->get_var( $wpdb->prepare( "SELECT ID FROM $wpdb->posts WHERE post_status = %s AND post_date >= %s AND post_date <= %s AND post_type IN ( {$post_types_in} ) LIMIT 1", $post_status, $start_date, $end_date ) );
}

/**
Expand All @@ -398,11 +421,12 @@ public static function date_range_has_posts( $start_date, $end_date ) {
public static function get_post_ids_for_date( $sitemap_date, $limit = 500 ) {
global $wpdb;

$post_status = self::get_post_status();
$start_date = $sitemap_date . ' 00:00:00';
$end_date = $sitemap_date . ' 23:59:59';
$post_types_in = self::get_supported_post_types_in();

$posts = $wpdb->get_results( $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_status = 'publish' AND post_date >= %s AND post_date <= %s AND post_type IN ( {$post_types_in} ) LIMIT %d", $start_date, $end_date, $limit ) );
$posts = $wpdb->get_results( $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_status = %s AND post_date >= %s AND post_date <= %s AND post_type IN ( {$post_types_in} ) LIMIT %d", $post_status, $start_date, $end_date, $limit ) );

usort( $posts, array( __CLASS__ , 'order_by_post_date' ) );

Expand All @@ -427,7 +451,7 @@ public static function generate_sitemap_for_date( $sitemap_date ) {
'post_name' => $sitemap_name,
'post_title' => $sitemap_name,
'post_type' => self::SITEMAP_CPT,
'post_status' => 'publish',
'post_status' => self::get_post_status(),
'post_date' => $sitemap_date,
);

Expand Down Expand Up @@ -497,23 +521,25 @@ public static function generate_sitemap_for_date( $sitemap_date ) {
// TODO: add images to sitemap via <image:image> tag
}

// Save the sitemap
$generated_xml_string = $xml->asXML();

// Save the sitemap
if ( $sitemap_exists ) {
// Get the previous post count
$previous_url_count = intval( get_post_meta( $sitemap_id, 'msm_indexed_url_count', true ) );

// Update the total post count with the difference
$total_url_count += $url_count - $previous_url_count;

update_post_meta( $sitemap_id, 'msm_sitemap_xml', $xml->asXML() );
update_post_meta( $sitemap_id, 'msm_sitemap_xml', $generated_xml_string );
update_post_meta( $sitemap_id, 'msm_indexed_url_count', $url_count );
do_action( 'msm_update_sitemap_post', $sitemap_id, $year, $month, $day );
do_action( 'msm_update_sitemap_post', $sitemap_id, $year, $month, $day, $generated_xml_string, $url_count );
} else {
/* Should no longer hit this */
$sitemap_id = wp_insert_post( $sitemap_data );
add_post_meta( $sitemap_id, 'msm_sitemap_xml', $xml->asXML() );
add_post_meta( $sitemap_id, 'msm_sitemap_xml', $generated_xml_string );
add_post_meta( $sitemap_id, 'msm_indexed_url_count', $url_count );
do_action( 'msm_insert_sitemap_post', $sitemap_id, $year, $month, $day );
do_action( 'msm_insert_sitemap_post', $sitemap_id, $year, $month, $day, $generated_xml_string, $url_count );

// Update the total url count
$total_url_count += $url_count;
Expand Down Expand Up @@ -591,7 +617,20 @@ public static function get_last_modified_posts() {

$post_types_in = self::get_supported_post_types_in();

$modified_posts = $wpdb->get_results( $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_type IN ( {$post_types_in} ) AND post_modified_gmt >= %s LIMIT 1000", $date ) );
$query = $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_type IN ( {$post_types_in} ) AND post_modified_gmt >= %s LIMIT 1000", $date );

/**
* Filter the query used to get the last modified posts.
* $wpdb->prepare() should be used for security if a new replacement query is created in the callback.
*
* @param string $query The query to use to get the last modified posts.
* @param string $post_types_in A comma-separated list of post types to include in the query.
* @param string $date The date to use as the cutoff for the query.
*/
$query = apply_filters( 'msm_pre_get_last_modified_posts', $query, $post_types_in, $date );

$modified_posts = $wpdb->get_results( $query );

return $modified_posts;
}

Expand Down
Loading

0 comments on commit 87a8b24

Please sign in to comment.