diff --git a/README.md b/README.md index 66eb327a..8ef574af 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,17 @@ See 'wp help msm-sitemap ' for more information on a specific command. Include custom post types in the generated sitemap with the `msm_sitemap_entry_post_type` filter. +## Generate Sitemap with posts of a custom status other than 'publish' + +By default, the sitemap will only fetch posts with the status of 'publish'. To change this, use the `msm_sitemap_post_status` filter. + +``` +function example_filter_msm_sitemap_post_status( $post_status ) { + return 'my_custom_status'; +} +add_filter( 'msm_sitemap_post_status', 'example_filter_msm_sitemap_post_status', 10, 1 ); +``` + ## Filtering Sitemap URLs If you need to filter the URLs displayed in a sitemap created via the Comprehensive Sitemap plugin, there are two considerations. First, if you are filtering the individual sitemaps, which display the URLs to the articles published on a specific date, you can use the `msm_sitemap_entry` hook to filter the URLs. An example for a reverse-proxy situation is below: @@ -83,3 +94,19 @@ add_filter( 'msm_sitemap_index', function( $sitemaps ) { } ); } ); ``` + +## Customize the last modified posts query + +Use the `msm_pre_get_last_modified_posts` filter to customize the query that gets the last modified posts. + +On large sites, this filter could be leveraged to enhance query efficiency by avoiding scanning older posts that don't get updated frequently and making better use of the `type_status_date` index. + +``` +function ( $query, $post_types_in, $date ) { + global $wpdb; + + $query = $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_type IN ( {$post_types_in} ) AND post_status = 'publish' AND post_date >= DATE_SUB(NOW(), INTERVAL 3 MONTH) AND post_modified_gmt >= %s LIMIT 1000", $date ); + + return $query; +}; +``` diff --git a/msm-sitemap.php b/msm-sitemap.php index dd90907c..ec371b86 100644 --- a/msm-sitemap.php +++ b/msm-sitemap.php @@ -126,7 +126,7 @@ public static function ajax_get_sitemap_counts() { $data = array( 'total_indexed_urls' => number_format( Metro_Sitemap::get_total_indexed_url_count() ), - 'total_sitemaps' => number_format( Metro_Sitemap::count_sitemaps() ), + 'total_sitemaps' => number_format( Metro_Sitemap::count_sitemaps() ), 'sitemap_indexed_urls' => self::get_recent_sitemap_url_counts( $n ), ); @@ -158,7 +158,7 @@ public static function render_sitemap_options_page() { if ( isset( $_POST['action'] ) ) { check_admin_referer( 'msm-sitemap-action' ); foreach ( $actions as $slug => $action ) { - if ( $action['text'] !== $_POST['action'] ) continue; + if ( $action['text'] !== $_POST['action'] ) continue; do_action( 'msm_sitemap_action-' . $slug ); break; @@ -325,14 +325,36 @@ public static function disable_canonical_redirects_for_sitemap_xml( $redirect_ur return $redirect_url; } + /** + * Hook allows developers to extend the sitemap functionality easily and integrate their custom post statuses. + * + * Rather than having to modify the plugin code, developers can use this filter to add their custom post statuses. + * + * @since 1.4.3 + * + */ + public static function get_post_status(): string { + $default_status = 'publish'; + $post_status = apply_filters('msm_sitemap_post_status', $default_status); + + $allowed_statuses = get_post_stati(); + + if (!in_array($post_status, $allowed_statuses)) { + $post_status = $default_status; + } + + return $post_status; + } + /** * Return range of years for posts in the database * @return int[] valid years */ public static function get_post_year_range() { global $wpdb; + $post_status = self::get_post_status(); - $oldest_post_date_year = $wpdb->get_var( "SELECT DISTINCT YEAR(post_date) as year FROM $wpdb->posts WHERE post_status = 'publish' AND post_date > 0 ORDER BY year ASC LIMIT 1" ); + $oldest_post_date_year = $wpdb->get_var( $wpdb->prepare( "SELECT DISTINCT YEAR(post_date) as year FROM $wpdb->posts WHERE post_status = %s AND post_date > 0 ORDER BY year ASC LIMIT 1", $post_status ) ); if ( null !== $oldest_post_date_year ) { $current_year = date( 'Y' ); @@ -383,9 +405,10 @@ public static function date_range_has_posts( $start_date, $end_date ) { $start_date .= ' 00:00:00'; $end_date .= ' 23:59:59'; + $post_status = self::get_post_status(); $post_types_in = self::get_supported_post_types_in(); - return $wpdb->get_var( $wpdb->prepare( "SELECT ID FROM $wpdb->posts WHERE post_status = 'publish' AND post_date >= %s AND post_date <= %s AND post_type IN ( {$post_types_in} ) LIMIT 1", $start_date, $end_date ) ); + return $wpdb->get_var( $wpdb->prepare( "SELECT ID FROM $wpdb->posts WHERE post_status = %s AND post_date >= %s AND post_date <= %s AND post_type IN ( {$post_types_in} ) LIMIT 1", $post_status, $start_date, $end_date ) ); } /** @@ -398,11 +421,12 @@ public static function date_range_has_posts( $start_date, $end_date ) { public static function get_post_ids_for_date( $sitemap_date, $limit = 500 ) { global $wpdb; + $post_status = self::get_post_status(); $start_date = $sitemap_date . ' 00:00:00'; $end_date = $sitemap_date . ' 23:59:59'; $post_types_in = self::get_supported_post_types_in(); - $posts = $wpdb->get_results( $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_status = 'publish' AND post_date >= %s AND post_date <= %s AND post_type IN ( {$post_types_in} ) LIMIT %d", $start_date, $end_date, $limit ) ); + $posts = $wpdb->get_results( $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_status = %s AND post_date >= %s AND post_date <= %s AND post_type IN ( {$post_types_in} ) LIMIT %d", $post_status, $start_date, $end_date, $limit ) ); usort( $posts, array( __CLASS__ , 'order_by_post_date' ) ); @@ -427,7 +451,7 @@ public static function generate_sitemap_for_date( $sitemap_date ) { 'post_name' => $sitemap_name, 'post_title' => $sitemap_name, 'post_type' => self::SITEMAP_CPT, - 'post_status' => 'publish', + 'post_status' => self::get_post_status(), 'post_date' => $sitemap_date, ); @@ -497,7 +521,9 @@ public static function generate_sitemap_for_date( $sitemap_date ) { // TODO: add images to sitemap via tag } - // Save the sitemap + $generated_xml_string = $xml->asXML(); + + // Save the sitemap if ( $sitemap_exists ) { // Get the previous post count $previous_url_count = intval( get_post_meta( $sitemap_id, 'msm_indexed_url_count', true ) ); @@ -505,15 +531,15 @@ public static function generate_sitemap_for_date( $sitemap_date ) { // Update the total post count with the difference $total_url_count += $url_count - $previous_url_count; - update_post_meta( $sitemap_id, 'msm_sitemap_xml', $xml->asXML() ); + update_post_meta( $sitemap_id, 'msm_sitemap_xml', $generated_xml_string ); update_post_meta( $sitemap_id, 'msm_indexed_url_count', $url_count ); - do_action( 'msm_update_sitemap_post', $sitemap_id, $year, $month, $day ); + do_action( 'msm_update_sitemap_post', $sitemap_id, $year, $month, $day, $generated_xml_string, $url_count ); } else { /* Should no longer hit this */ $sitemap_id = wp_insert_post( $sitemap_data ); - add_post_meta( $sitemap_id, 'msm_sitemap_xml', $xml->asXML() ); + add_post_meta( $sitemap_id, 'msm_sitemap_xml', $generated_xml_string ); add_post_meta( $sitemap_id, 'msm_indexed_url_count', $url_count ); - do_action( 'msm_insert_sitemap_post', $sitemap_id, $year, $month, $day ); + do_action( 'msm_insert_sitemap_post', $sitemap_id, $year, $month, $day, $generated_xml_string, $url_count ); // Update the total url count $total_url_count += $url_count; @@ -591,7 +617,20 @@ public static function get_last_modified_posts() { $post_types_in = self::get_supported_post_types_in(); - $modified_posts = $wpdb->get_results( $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_type IN ( {$post_types_in} ) AND post_modified_gmt >= %s LIMIT 1000", $date ) ); + $query = $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_type IN ( {$post_types_in} ) AND post_modified_gmt >= %s LIMIT 1000", $date ); + + /** + * Filter the query used to get the last modified posts. + * $wpdb->prepare() should be used for security if a new replacement query is created in the callback. + * + * @param string $query The query to use to get the last modified posts. + * @param string $post_types_in A comma-separated list of post types to include in the query. + * @param string $date The date to use as the cutoff for the query. + */ + $query = apply_filters( 'msm_pre_get_last_modified_posts', $query, $post_types_in, $date ); + + $modified_posts = $wpdb->get_results( $query ); + return $modified_posts; } diff --git a/tests/test-sitemap-functions.php b/tests/test-sitemap-functions.php index 85398ca2..0a69f67d 100644 --- a/tests/test-sitemap-functions.php +++ b/tests/test-sitemap-functions.php @@ -26,7 +26,6 @@ class WP_Test_Sitemap_Functions extends WP_UnitTestCase { */ function setup(): void { $this->test_base = new MSM_SiteMap_Test(); - } /** @@ -43,6 +42,27 @@ function teardown(): void { array_map( 'wp_delete_post', array_merge( $this->test_base->posts_created, $sitemaps ) ); } + /** + * custom post_status setup + */ + public function customPostStatusSetUp() { + // register new post status. + register_post_status( 'live', array( + 'public' => true, + ) ); + + // add filter to return custom post status. + add_filter( 'msm_sitemap_post_status', array( $this, 'add_post_status_to_msm_sitemap' ) ); + + } + + /** + * custom post_status teardown + */ + public function customPostStatusTearDown() { + remove_filter( 'msm_sitemap_post_status', array( $this, 'add_post_status_to_msm_sitemap' ) ); + } + /** * Data Provider prividing map of recent variable and expected url count. * @@ -134,6 +154,32 @@ function test_get_post_year_range( $years, $range_values ) { $this->assertEquals( $range_values, count( $year_range ) ); } + /** + * Verify get_post_year_range returns proper year ranges with custom status hook + * + * @dataProvider postYearRangeDataProvider + * @param int $years Number of years. + * @param int $range_values Number of years in range. + */ + function test_get_post_year_range_custom_status_posts( $years, $range_values ) { + + // set msm_sitemap_post_status filter to custom_status. + $this->customPostStatusSetUp(); + + // Add a post for each day in the last x years. + if ( 'none' !== $years ) { + $date = strtotime( "-$years year", time() ); + $cur_day = date( 'Y', $date ) . '-' . date( 'm', $date ) . '-' . date( 'd', $date ) . ' 00:00:00'; + $this->test_base->create_dummy_post( $cur_day, 'live' ); + } + + $year_range = Metro_Sitemap::get_post_year_range(); + $this->assertEquals( $range_values, count( $year_range ) ); + + // remove filter. + $this->customPostStatusTearDown(); + } + /** * Verify check_year_has_posts returns only years with posts */ @@ -201,6 +247,18 @@ public function dateRangeHasPostsDataProvider() { ); } + /** + * Data Provider for date_range_has_posts + * + * @return array( str, str, boolean ) Array of Test parameters. + */ + public function dateRangeHasPostsCustomStatusDataProvider() { + return array( + array( '2016-11-01', '2016-12-15', false ), + array( '2014-12-28', '2016-05-04', true ), + ); + } + /** * Verify date_range_has_posts returns expected value * @@ -229,6 +287,38 @@ function test_date_range_has_posts( $start_date, $end_date, $has_post ) { } + /** + * Verify date_range_has_posts returns expected value with custom status hook + * + * @dataProvider dateRangeHasPostsCustomStatusDataProvider + * @param string $start_date Start Date of Range in Y-M-D format. + * @param string $end_date End Date of Range in Y-M-D format. + * @param boolean $has_post Does Range have Post. + */ + function test_date_range_has_posts_custom_status( $start_date, $end_date, $has_post ) { + // set msm_sitemap_post_status filter to custom_status. + $this->customPostStatusSetUp(); + + // 1 for 2016-10-12 in "live" status. + $this->test_base->create_dummy_post( '2015-10-12 00:00:00', 'live' ); + + // 1 for 2016-01-01. + $this->test_base->create_dummy_post( '2016-01-01 00:00:00' ); + + // // 1 for 2015-06-02. + $this->test_base->create_dummy_post( '2015-06-02 00:00:00' ); + + // Validate Range result. + if ( $has_post ) { + $this->assertNotNull( Metro_Sitemap::date_range_has_posts( $start_date, $end_date ) ); + } else { + $this->assertNull( Metro_Sitemap::date_range_has_posts( $start_date, $end_date ) ); + } + + $this->customPostStatusTearDown(); + + } + /** * Data Provider for get_post_ids_for_date @@ -273,4 +363,109 @@ function test_get_post_ids_for_date( $sitemap_date, $limit, $expected_count ) { } + /** + * Verify get_post_ids_for_date returns expected value with custom status hook + * + * @dataProvider postIdsForDateDataProvider + * @param string $sitemap_date Date in Y-M-D format. + * @param string $limit Max number of posts to return. + * @param int $expected_count Number of posts expected to be returned. + */ + function test_get_post_ids_for_date_custom_status( $sitemap_date, $limit, $expected_count ) { + + // set msm_sitemap_post_status filter to custom_status. + $this->customPostStatusSetUp(); + + // 1 for 2016-10-03 in "draft" status. + $this->test_base->create_dummy_post( '2016-10-01 00:00:00', 'draft' ); + + $created_post_ids = array(); + // 20 for 2016-10-02. + for ( $i = 0; $i < 20; $i ++ ) { + $hour = $i < 10 ? '0' . $i : $i; + if ( '2016-10-02' === $sitemap_date ) { + $created_post_ids[] = $this->test_base->create_dummy_post( '2016-10-02 ' . $hour . ':00:00', 'live' ); + } + } + + + $post_ids = Metro_Sitemap::get_post_ids_for_date( $sitemap_date, $limit ); + $this->assertEquals( $expected_count, count( $post_ids ) ); + $this->assertEquals( array_slice( $created_post_ids, 0, $limit ), $post_ids ); + + $this->customPostStatusTearDown(); + } + + /** + * Verify msm_sitemap_post_status filter returns expected value + */ + function test_get_post_status() { + + // set msm_sitemap_post_status filter to custom_status. + $this->customPostStatusSetUp(); + + $this->assertEquals( 'live', Metro_Sitemap::get_post_status() ); + + add_filter( 'msm_sitemap_post_status', function() { + return 'bad_status'; + } ); + $this->assertEquals( 'publish', Metro_Sitemap::get_post_status() ); + + // remove filter. + remove_filter( 'msm_sitemap_post_status', function() { + return 'bad_status'; + } ); + + $this->customPostStatusTearDown(); + + } + + function add_post_status_to_msm_sitemap( $post_status ) { + return 'live'; + } + + function test_get_last_modified_posts_filter_no_change() { + $posts_before = Metro_Sitemap::get_last_modified_posts(); + $tag = 'msm_pre_get_last_modified_posts'; + + // Test no changes to query. + $function = function ( $query ) { + return $query; + }; + add_filter( $tag, $function, 10, 3 ); + $posts_after = Metro_Sitemap::get_last_modified_posts(); + remove_filter( $tag, $function ); + + $this->assertEquals( count( $posts_before ), count( $posts_after ) ); + } + + function test_get_last_modified_posts_filter_change_query() { + $posts_before = Metro_Sitemap::get_last_modified_posts(); + $tag = 'msm_pre_get_last_modified_posts'; + + // Modify query to fetch posts created in the last 3 months. + $function = function ( $query, $post_types_in, $date ) { + global $wpdb; + $query = $wpdb->prepare( "SELECT ID, post_date FROM $wpdb->posts WHERE post_type IN ( {$post_types_in} ) AND post_date >= DATE_SUB(NOW(), INTERVAL 3 MONTH) AND post_modified_gmt >= %s LIMIT 1000", $date ); + return $query; + }; + + add_filter( $tag, $function, 10, 3 ); + $posts_after_date = Metro_Sitemap::get_last_modified_posts(); + remove_filter( $tag, $function ); + + // Modify query as string to fetch only 10 posts. + $limit = 10; + $function = function ( $query ) use ( $limit ) { + return str_replace( 'LIMIT 1000', "LIMIT $limit", $query ); + }; + + add_filter( $tag, $function ); + $posts_after = Metro_Sitemap::get_last_modified_posts(); + remove_filter( $tag, $function ); + + $this->assertLessThan( count( $posts_before ), count( $posts_after_date ) ); + $this->assertEquals( count( $posts_after ), $limit ); + } + }