userAgent ], true ); if ( false !== $index2 && ! $allowDuplicates ) { if ( $allowOverride ) { unset( $rules1[ $userAgent ][ $index2 ] ); } else { unset( $rules2[ $userAgent ][ $index1 ] ); } } $pattern = str_replace( [ '.', '*', '?', '$' ], [ '\.', '(.*)', '\?', '\$' ], $ruleValue ); foreach ( $rules1[ $userAgent ] as $rule1 ) { $matches = []; preg_match( "#^$otherDirective: $pattern$#", $rule1, $matches ); } if ( ! empty( $matches ) && ! $allowDuplicates ) { unset( $rules2[ $userAgent ][ $index1 ] ); } } return [ $rules1, $rules2 ]; } /** * Parses a rule and extracts the directive and value. * * @since 4.4.2 * * @param string $rule The rule to parse. * @return array An array containing the parsed directive and value. */ private function parseRule( $rule ) { list( $directive, $value ) = array_map( 'trim', array_pad( explode( ':', $rule, 2 ), 2, '' ) ); return [ $directive, $value ]; } /** * Stringifies the parsed rules. * * @since 4.0.0 * @version 4.4.2 * * @param array $allRules The rules array. * @return string The stringified rules. */ private function stringifyRuleset( $allRules ) { $robots = []; foreach ( $allRules as $userAgent => $rules ) { if ( empty( $userAgent ) ) { continue; } $robots[] = "\r\nUser-agent: $userAgent"; foreach ( $rules as $rule ) { list( $directive, $value ) = $this->parseRule( $rule ); if ( empty( $directive ) || empty( $value ) ) { continue; } $robots[] = sprintf( '%s: %s', ucfirst( $directive ), $value ); } } $robots = implode( "\r\n", $robots ); $sitemapUrls = $this->getSitemapRules(); if ( ! empty( $sitemapUrls ) ) { $sitemapUrls = implode( "\r\n", $sitemapUrls ); $robots .= "\r\n\r\n$sitemapUrls"; } return trim( $robots ); } /** * Get Sitemap URLs excluding the default ones. * * @since 4.1.7 * * @return array An array of the Sitemap URLs. */ private function getSitemapRules() { $defaultSitemaps = $this->extractSitemapUrls( aioseo()->robotsTxt->getDefaultRobotsTxtContent() ); $sitemapRules = aioseo()->sitemap->helpers->getSitemapUrlsPrefixed(); return array_diff( $sitemapRules, $defaultSitemaps ); } /** * Parses the rules. * * @since 4.0.0 * @version 4.4.2 * * @param array $rules An array of rules. * @return array The rules grouped by user agent. */ private function groupRulesByUserAgent( $rules ) { $groups = []; foreach ( $rules as $rule ) { $r = json_decode( $rule, true ); if ( empty( $r['userAgent'] ) || empty( $r['fieldValue'] ) ) { continue; } if ( empty( $groups[ $r['userAgent'] ] ) ) { $groups[ $r['userAgent'] ] = []; } $groups[ $r['userAgent'] ][] = "{$r['directive']}: {$r['fieldValue']}"; } return $groups; } /** * Extract rules from a string. * * @since 4.0.0 * @version 4.4.2 * * @param string $lines The lines to extract from. * @return array An array of extracted rules. */ public function extractRules( $lines ) { $lines = array_filter( array_map( 'trim', explode( "\n", (string) $lines ) ) ); $rules = []; $userAgent = null; $prevDirective = null; $prevValue = null; $siblingsUserAgents = []; foreach ( $lines as $line ) { list( $directive, $value ) = $this->parseRule( $line ); if ( empty( $directive ) || empty( $value ) ) { continue; } $directive = strtolower( $directive ); if ( ! in_array( $directive, $this->allowedDirectives, true ) ) { continue; } $value = $this->sanitizeDirectiveValue( $directive, $value ); if ( ! $value ) { continue; } if ( 'user-agent' === $directive ) { if ( ! empty( $prevDirective ) && ! empty( $prevValue ) && 'user-agent' === $prevDirective ) { $siblingsUserAgents[] = $prevValue; } $userAgent = $value; $rules[ $userAgent ] = ! empty( $rules[ $userAgent ] ) ? $rules[ $userAgent ] : []; } else { $rules[ $userAgent ][] = "$directive: $value"; if ( $siblingsUserAgents ) { foreach ( $siblingsUserAgents as $siblingUserAgent ) { $rules[ $siblingUserAgent ] = $rules[ $userAgent ]; } $siblingsUserAgents = []; } } $prevDirective = $directive; $prevValue = $value; } return $rules; } /** * Extract sitemap URLs from a string. * * @since 4.0.10 * * @param string $lines The lines to extract from. * @return array An array of sitemap URLs. */ public function extractSitemapUrls( $lines ) { $lines = array_filter( array_map( 'trim', explode( "\n", (string) $lines ) ) ); $sitemapUrls = []; foreach ( $lines as $line ) { $array = array_map( 'trim', explode( 'sitemap:', strtolower( $line ) ) ); if ( ! empty( $array[1] ) ) { $sitemapUrls[] = trim( $line ); } } return $sitemapUrls; } /** * Sanitize the robots.txt rule directive value. * * @since 4.0.0 * @version 4.4.2 * * @param string $directive The directive. * @param string $value The value. * @return string The directive value. */ private function sanitizeDirectiveValue( $directive, $value ) { // Percent-encoded characters are stripped from our option values, so we decode. $value = rawurldecode( trim( $value ) ); if ( ! $value ) { return $value; } $value = preg_replace( '/[><]/', '', $value ); if ( 'user-agent' === $directive ) { $value = preg_replace( '/[^a-z0-9\-_*,.\s]/i', '', $value ); } if ( 'allow' === $directive || 'disallow' === $directive ) { $value = preg_replace( '/^\/+/', '/', $value ); } return $value; } /** * Check if a physical robots.txt file exists, and if it does add a notice. * * @since 4.0.0 * * @return void */ public function checkForPhysicalFiles() { if ( ! $this->hasPhysicalRobotsTxt() ) { return; } $notification = Models\Notification::getNotificationByName( 'robots-physical-file' ); if ( $notification->exists() ) { return; } Models\Notification::addNotification( [ 'slug' => uniqid(), 'notification_name' => 'robots-physical-file', 'title' => __( 'Physical Robots.txt File Detected', 'all-in-one-seo-pack' ), 'content' => sprintf( // Translators: 1 - The plugin short name ("AIOSEO"), 2 - The plugin short name ("AIOSEO"). __( '%1$s has detected a physical robots.txt file in the root folder of your WordPress installation. We recommend removing this file as it could cause conflicts with WordPress\' dynamically generated one. %2$s can import this file and delete it, or you can simply delete it.', 'all-in-one-seo-pack' ), // phpcs:ignore Generic.Files.LineLength.MaxExceeded AIOSEO_PLUGIN_SHORT_NAME, AIOSEO_PLUGIN_SHORT_NAME ), 'type' => 'error', 'level' => [ 'all' ], 'button1_label' => __( 'Import and Delete', 'all-in-one-seo-pack' ), 'button1_action' => 'http://action#tools/import-robots-txt?redirect=aioseo-tools:robots-editor', 'button2_label' => __( 'Delete', 'all-in-one-seo-pack' ), 'button2_action' => 'http://action#tools/delete-robots-txt?redirect=aioseo-tools:robots-editor', 'start' => gmdate( 'Y-m-d H:i:s' ) ] ); } /** * Import physical robots.txt file. * * @since 4.0.0 * @version 4.4.2 * * @param bool $network True if inside WordPress network administration pages. * @throws \Exception If request fails or file is not readable. * @return boolean Whether or not the file imported correctly. */ public function importPhysicalRobotsTxt( $network = false ) { try { $fs = aioseo()->core->fs; if ( ! $fs->isWpfsValid() ) { $invalid = true; } $file = trailingslashit( $fs->fs->abspath() ) . 'robots.txt'; if ( isset( $invalid ) || ! $fs->isReadable( $file ) ) { throw new \Exception( esc_html__( 'There was an error importing the static robots.txt file.', 'all-in-one-seo-pack' ) ); } $lines = trim( (string) $fs->getContents( $file ) ); if ( $lines ) { $this->importRobotsTxtFromText( $lines, $network ); } return true; } catch ( \Exception $e ) { throw new \Exception( esc_html( $e->getMessage() ) ); } } /** * Import robots.txt from a URL. * * @since 4.4.2 * * @param string $text The text to import from. * @param bool $network True if inside WordPress network administration pages. * @throws \Exception If no User-agent is found. * @return boolean Whether the file imported correctly or not. */ public function importRobotsTxtFromText( $text, $network ) { $ruleset = $this->extractRules( $text ); if ( ! key( $ruleset ) ) { throw new \Exception( esc_html__( 'No User-agent found in the content beginning.', 'all-in-one-seo-pack' ) ); } $options = aioseo()->options; if ( $network ) { $options = aioseo()->networkOptions; } $currentRules = $this->groupRulesByUserAgent( $options->tools->robots->rules ); $ruleset = $this->mergeRules( $currentRules, $ruleset, false, true ); $options->tools->robots->rules = aioseo()->robotsTxt->prepareRobotsTxt( $ruleset ); return true; } /** * Import robots.txt from a URL. * * @since 4.4.2 * * @param string $url The URL to import from. * @param bool $network True if inside WordPress network administration pages. * @throws \Exception If request fails. * @return bool Whether the import was successful or not. */ public function importRobotsTxtFromUrl( $url, $network ) { $request = wp_remote_get( $url, [ 'timeout' => 10, 'sslverify' => false ] ); $robotsTxtContent = wp_remote_retrieve_body( $request ); if ( ! $robotsTxtContent ) { throw new \Exception( esc_html__( 'There was an error importing the robots.txt content from the URL.', 'all-in-one-seo-pack' ) ); } $options = aioseo()->options; if ( $network ) { $options = aioseo()->networkOptions; } $newRules = $this->extractRules( $robotsTxtContent ); $currentRules = $this->groupRulesByUserAgent( $options->tools->robots->rules ); $newRules = $this->mergeRules( $currentRules, $newRules, false, true ); $options->tools->robots->rules = aioseo()->robotsTxt->prepareRobotsTxt( $newRules ); return true; } /** * Deletes the physical robots.txt file. * * @since 4.4.5 * * @throws \Exception If the file is not readable, or it can't be deleted. * @return true True if the file was successfully deleted. */ public function deletePhysicalRobotsTxt() { try { $fs = aioseo()->core->fs; if ( ! $fs->isWpfsValid() || ! $fs->fs->delete( trailingslashit( $fs->fs->abspath() ) . 'robots.txt' ) ) { throw new \Exception( __( 'There was an error deleting the physical robots.txt file.', 'all-in-one-seo-pack' ) ); } Models\Notification::deleteNotificationByName( 'robots-physical-file' ); return true; } catch ( \Exception $e ) { throw new \Exception( esc_html( $e->getMessage() ) ); } } /** * Prepare robots.txt rules to save. * * @since 4.1.4 * * @param array $allRules Array with the rules. * @return array The prepared rules array. */ public function prepareRobotsTxt( $allRules = [] ) { $robots = []; foreach ( $allRules as $userAgent => $rules ) { if ( empty( $userAgent ) ) { continue; } foreach ( $rules as $rule ) { list( $directive, $value ) = $this->parseRule( $rule ); if ( empty( $directive ) || empty( $value ) ) { continue; } if ( '*' === $userAgent && ( 'allow' === $directive && '/wp-admin/admin-ajax.php' === $value || 'disallow' === $directive && '/wp-admin/' === $value ) ) { continue; } $robots[] = wp_json_encode( [ 'userAgent' => $userAgent, 'directive' => $directive, 'fieldValue' => $value ] ); } } return $robots; } /** * Checks if a physical robots.txt file exists. * * @since 4.0.0 * * @return boolean True if it does, false if not. */ public function hasPhysicalRobotsTxt() { $fs = aioseo()->core->fs; if ( ! $fs->isWpfsValid() ) { return false; } $accessType = get_filesystem_method(); if ( 'direct' === $accessType ) { $file = trailingslashit( $fs->fs->abspath() ) . 'robots.txt'; return $fs->exists( $file ); } return false; } /** * Get the default Robots.txt lines (excluding our own). * * @since 4.1.7 * @version 4.4.2 * * @return string The robots.txt content rules (excluding our own). */ public function getDefaultRobotsTxtContent() { // First, we need to remove our filter, so that it doesn't run unintentionally. remove_filter( 'robots_txt', [ $this, 'buildRules' ], 10000 ); ob_start(); do_robots(); if ( is_admin() ) { header( 'Content-Type: text/html; charset=utf-8' ); } $rules = strval( ob_get_clean() ); // Add the filter back. add_filter( 'robots_txt', [ $this, 'buildRules' ], 10000 ); return $rules; } /** * A check to see if the rewrite rules are set. * This isn't perfect, but it will help us know in most cases. * * @since 4.0.0 * * @return boolean Whether the rewrite rules are set or not. */ public function rewriteRulesExist() { // If we have a physical file, it's almost impossible to tell if the rewrite rules are set. // The only scenario is if we still get a 404. $response = wp_remote_get( aioseo()->helpers->getSiteUrl() . '/robots.txt' ); if ( 299 < wp_remote_retrieve_response_code( $response ) ) { return false; } return true; } }