<?php
namespace Miro_AI_SEO;
if (!defined('ABSPATH')) { exit; }

/**
 * Lightweight embeddings-style suggester using term-weight vectors.
 * No external API: builds a bag-of-words vector for title + first ~300 words.
 * Stores top terms in postmeta `_miro_embed_v1` as JSON { term => weight }.
 */
class Embeddings {

    /** -------- Utilities -------- */

    private static function tokenize($text){
        $text = strtolower( wp_strip_all_tags( $text ) );
        $text = preg_replace('/\[.+?\]/', ' ', $text);        // strip shortcodes patterns
        $text = preg_replace('/[^a-z0-9\s]+/', ' ', $text);   // keep alnum + spaces
        $parts = preg_split('/\s+/', $text);
        if (!$parts) return [];
        $stop = [
            'the','and','for','that','with','this','from','your','have','are','was','were','you','but','not','has','had','him','her','its','our','out','get','use','using','into','how','why','can','cant','will','wont','should','could','would','about','when','what','where','which','also','more','less','very','just','like'
        ];
        $parts = array_filter($parts, function($w) use ($stop){
            return strlen($w) > 2 && !in_array($w, $stop, true);
        });
        return array_values($parts);
    }

    private static function vectorize($title, $content){
        // Use raw content (no the_content filters to avoid recursion/slowdowns)
        $plain = wp_strip_all_tags( $content );
        $plain = preg_replace('/\s+/', ' ', $plain);
        $words = array_slice(explode(' ', $plain), 0, 300);
        $base  = strtolower( trim($title . ' ' . implode(' ', $words)) );

        $tokens = self::tokenize($base);
        if (!$tokens) return [];

        $tf = [];
        foreach ($tokens as $t){ $tf[$t] = ($tf[$t] ?? 0) + 1; }

        // Keep top ~40 weighted terms, normalized (L2)
        arsort($tf);
        $top  = array_slice($tf, 0, 40, true);
        $sum  = 0.0; foreach ($top as $k=>$v){ $sum += $v*$v; }
        $norm = $sum > 0 ? sqrt($sum) : 1.0;

        $vec = [];
        foreach ($top as $k=>$v){ $vec[$k] = round($v / $norm, 6); }
        return $vec;
    }

    private static function cosine($a, $b){
        if (empty($a) || empty($b)) return 0.0;
        $dot = 0.0; $na = 0.0; $nb = 0.0;
        foreach ($a as $k => $v){
            $na += $v * $v;
            if (isset($b[$k])) $dot += $v * $b[$k];
        }
        foreach ($b as $v){ $nb += $v * $v; }
        if ($na <= 0 || $nb <= 0) return 0.0;
        return $dot / (sqrt($na) * sqrt($nb));
    }

    /** -------- Build / Save -------- */

    /**
     * Build + save the embedding for a post (safe to call on-demand).
     */
    public static function build_for_post($post_id){
        $post = get_post($post_id);
        if (!$post) return false;
        if (wp_is_post_revision($post_id)) return false;
        if (!in_array($post->post_type, ['post','page'], true)) return false;

        $title   = get_the_title($post_id);
        $content = (string)($post->post_content ?? '');
        $vec     = self::vectorize($title, $content);

        update_post_meta($post_id, '_miro_embed_v1', wp_json_encode($vec));
        return !empty($vec);
    }

    /**
     * Hook: build when a post is saved/updated.
     */
    public static function on_save_post($post_id, $post, $update){
        // Delegate to the reusable builder
        self::build_for_post($post_id);
    }

    /** -------- Suggest -------- */

    /**
     * Suggest internal links for the given post.
     * Returns array of [title, url, confidence, suggestedAnchor, post_id].
     */
    public static function suggest($post_id, $limit = 10){
        $limit = max(1, intval($limit));

        // Ensure the SOURCE post has an embedding; if not, build now
        $src_json = get_post_meta($post_id, '_miro_embed_v1', true);
        $src = $src_json ? json_decode($src_json, true) : [];
        if (!is_array($src) || empty($src)){
            self::build_for_post($post_id);
            $src_json = get_post_meta($post_id, '_miro_embed_v1', true);
            $src = $src_json ? json_decode($src_json, true) : [];
            if (!is_array($src)) $src = [];
        }

        // Get candidate posts/pages (recent first)
        $q = new \WP_Query([
            'post_type'      => ['post','page'],
            'post_status'    => 'publish',
            'posts_per_page' => 50,
            'post__not_in'   => [$post_id], // phpcs:ignore WordPressVIPMinimum.Performance.WPQueryParams.PostNotIn_post__not_in
            'orderby'        => 'date',
            'order'          => 'DESC',
            'fields'         => 'ids',
            'no_found_rows'  => true,
        ]);

        $scores = [];
        foreach ($q->posts as $pid){
            // Ensure CANDIDATE has an embedding; if not, build now
            $dst_json = get_post_meta($pid, '_miro_embed_v1', true);
            $dst = $dst_json ? json_decode($dst_json, true) : [];
            if (!is_array($dst) || empty($dst)){
                self::build_for_post($pid);
                $dst_json = get_post_meta($pid, '_miro_embed_v1', true);
                $dst = $dst_json ? json_decode($dst_json, true) : [];
                if (!is_array($dst)) $dst = [];
            }

            $sim = self::cosine($src, $dst);
            $scores[] = [
                'post_id' => $pid,
                'title'   => get_the_title($pid),
                'url'     => get_permalink($pid),
                'sim'     => $sim,
            ];
        }

        // Sort by similarity desc
        usort($scores, function($a, $b){
            if ($a['sim'] == $b['sim']) return 0;
            return ($a['sim'] < $b['sim']) ? 1 : -1;
        });

        // Map to output
        $out = [];
        foreach (array_slice($scores, 0, $limit) as $s){
            $anchor = self::pick_anchor($s['title']);
            $out[] = [
                'post_id'         => $s['post_id'],
                'title'           => $s['title'],
                'url'             => $s['url'],
                'confidence'      => round($s['sim'], 2),
                'suggestedAnchor' => $anchor,
            ];
        }
        return $out;
    }

    private static function pick_anchor($title){
        $t = strtolower($title);
        $t = preg_replace('/[^a-z0-9\s]+/',' ', $t);
        $parts = array_filter(explode(' ', $t), function($w){ return strlen($w) > 2; });
        $parts = array_slice($parts, 0, 5);
        $anchor = implode(' ', $parts);
        return ucwords($anchor ?: $title);
    }
}
