Website/app/api/seo-check/route.js

// app/api/seo-check/route.js
import { NextResponse } from "next/server";
import puppeteer from "puppeteer-core";

export async function POST(request) {
  const startTime = Date.now();

  try {
    // Validate request
    let url;
    try {
      const body = await request.json();
      url = body?.url;
      if (!url) throw new Error("URL is required");
    } catch (e) {
      return NextResponse.json(
        { error: "Invalid request format" },
        { status: 400 }
      );
    }

    // Validate URL format
    let parsedUrl;
    try {
      parsedUrl = new URL(url);
      if (!["http:", "https:"].includes(parsedUrl.protocol)) {
        throw new Error("Invalid protocol");
      }
    } catch (e) {
      return NextResponse.json(
        { error: "Please provide a valid HTTP/HTTPS URL" },
        { status: 400 }
      );
    }

    // Launch Puppeteer browser
    let browser;
    try {
      browser = await puppeteer.launch({
        executablePath: process.env.NEXT_CHROMIUM_PATH,
        headless: "new",
        args: [
          "--no-sandbox",
          "--disable-setuid-sandbox",
          "--disable-dev-shm-usage",
        ],
      });
    } catch (e) {
      return NextResponse.json(
        { error: "Failed to initialize browser" },
        { status: 500 }
      );
    }

    let page;
    let finalUrl;
    let html;
    let securityHeaders = {};
    let response;

    try {
      page = await browser.newPage();

      // Set user agent and headers
      await page.setUserAgent(
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
      );

      // Listen for response to capture headers
      page.on("response", async (res) => {
        if (res.url() === url || res.url() === finalUrl) {
          response = res;
          securityHeaders = {
            https: res.url().startsWith("https://"),
            xFrameOptions: res.headers()["x-frame-options"],
            xXSSProtection: res.headers()["x-xss-protection"],
            contentTypeOptions: res.headers()["x-content-type-options"],
            strictTransportSecurity: res.headers()["strict-transport-security"],
          };
        }
      });

      // Navigate to the page with timeout
      await page.goto(url, {
        waitUntil: "networkidle2",
        timeout: 15000,
      });

      // Get final URL after potential redirects
      finalUrl = page.url();

      // Get the full HTML content
      html = await page.content();
    } catch (e) {
      await browser.close();
      return NextResponse.json(
        {
          error:
            e.name === "TimeoutError"
              ? "Page load timed out"
              : `Failed to load page: ${e.message}`,
        },
        { status: 400 }
      );
    }

    // Use Puppeteer's DOM methods for analysis
    const analysis = {
      url: finalUrl,
      pageLoadTime: (Date.now() - startTime) / 1000,
      title: await analyzeTitle(page),
      meta: {
        description: await analyzeMetaDescription(page),
        robots: await analyzeMetaRobots(page),
        viewport: await analyzeViewport(page),
        charset: await analyzeCharset(page),
        keywords: await getMetaContent(page, "keywords"),
      },
      headings: await analyzeHeadings(page),
      images: await analyzeImages(page),
      links: await analyzeLinks(page, finalUrl),
      content: await analyzeContent(page),
      technical: {
        canonical: await analyzeCanonical(page),
        language: await analyzeLanguage(page),
        schemaMarkup: await analyzeSchemaMarkup(page),
        doctype: await analyzeDoctype(page),
      },
      social: {
        openGraph: {
          title: await getMetaContent(page, "og:title"),
          description: await getMetaContent(page, "og:description"),
          image: await getMetaContent(page, "og:image"),
          url: await getMetaContent(page, "og:url"),
        },
        twitterCard: {
          card: await getMetaContent(page, "twitter:card"),
          title: await getMetaContent(page, "twitter:title"),
          description: await getMetaContent(page, "twitter:description"),
          image: await getMetaContent(page, "twitter:image"),
        },
      },
      security: securityHeaders,
      analyzedAt: new Date().toISOString(),
    };

    await browser.close();
    return NextResponse.json(analysis);
  } catch (error) {
    console.error("SEO analysis error:", error);
    return NextResponse.json(
      {
        error: "Internal server error during analysis",
        details:
          process.env.NODE_ENV === "development" ? error.stack : undefined,
      },
      { status: 500 }
    );
  }
}

// Helper function to get meta content
async function getMetaContent(page, nameOrProperty) {
  try {
    return await page.$eval(
      `meta[name="${nameOrProperty}"], meta[property="${nameOrProperty}"]`,
      (el) => (el ? el.getAttribute("content") : null)
    );
  } catch {
    return null;
  }
}

// Analysis functions using Puppeteer's DOM API
async function analyzeTitle(page) {
  const title = await page.title();
  return {
    exists: title.length > 0,
    text: title,
    length: title.length,
    status:
      title.length >= 30 && title.length <= 60
        ? "optimal"
        : title.length < 30
        ? "too_short"
        : "too_long",
  };
}

async function analyzeMetaDescription(page) {
  const desc = (await getMetaContent(page, "description")) || "";
  return {
    exists: desc.length > 0,
    text: desc,
    length: desc.length,
    status:
      desc.length >= 50 && desc.length <= 160
        ? "optimal"
        : desc.length < 50
        ? "too_short"
        : "too_long",
  };
}

async function analyzeMetaRobots(page) {
  const content = (await getMetaContent(page, "robots")) || "";
  return {
    exists: content.length > 0,
    content,
    noindex: content.includes("noindex"),
    nofollow: content.includes("nofollow"),
  };
}

async function analyzeViewport(page) {
  const viewport = (await getMetaContent(page, "viewport")) || "";
  return {
    exists: viewport.length > 0,
    content: viewport,
    mobileFriendly: viewport.includes("width=device-width"),
  };
}

async function analyzeCharset(page) {
  try {
    // Check meta charset
    const charsetMeta = await page.$eval("meta[charset]", (el) =>
      el.getAttribute("charset")
    );
    if (charsetMeta) {
      return {
        exists: true,
        value: charsetMeta.toUpperCase(),
        declaredInMeta: true,
      };
    }

    // Check http-equiv
    const httpEquiv = await page.$eval(
      'meta[http-equiv="Content-Type"]',
      (el) => el.getAttribute("content")
    );
    if (httpEquiv) {
      const charsetMatch = httpEquiv.match(/charset=([^;]+)/i);
      if (charsetMatch) {
        return {
          exists: true,
          value: charsetMatch[1].toUpperCase(),
          declaredInMeta: true,
        };
      }
    }

    return {
      exists: false,
      value: null,
      declaredInMeta: false,
    };
  } catch {
    return {
      exists: false,
      value: null,
      declaredInMeta: false,
    };
  }
}

async function analyzeHeadings(page) {
  const getHeadingTexts = async (selector) => {
    return page.$$eval(selector, (els) =>
      els.map((el) => el.textContent.trim())
    );
  };

  return {
    h1: {
      count: await page.$$eval("h1", (els) => els.length),
      texts: await getHeadingTexts("h1"),
    },
    h2: {
      count: await page.$$eval("h2", (els) => els.length),
      texts: await getHeadingTexts("h2"),
    },
    h3: {
      count: await page.$$eval("h3", (els) => els.length),
      texts: await getHeadingTexts("h3"),
    },
  };
}

async function analyzeImages(page) {
  const images = await page.$$("img");
  const withAlt = await page.$$eval(
    "img",
    (imgs) => imgs.filter((img) => img.alt && img.alt.trim() !== "").length
  );

  return {
    total: images.length,
    withAlt,
    withoutAlt: images.length - withAlt,
    percentageWithAlt:
      images.length > 0 ? Math.round((withAlt / images.length) * 100) : 100,
  };
}

async function analyzeLinks(page, baseUrl) {
  const links = await page.$$("a[href]");
  let internal = 0;
  let external = 0;
  let nofollow = 0;

  try {
    const baseDomain = new URL(baseUrl).hostname.replace("www.", "");

    for (const link of links) {
      const href = await link.evaluate((el) => el.getAttribute("href"));
      const rel = await link.evaluate((el) => el.getAttribute("rel") || "");

      if (rel.includes("nofollow")) nofollow++;

      try {
        const url = new URL(href, baseUrl);
        if (url.hostname.replace("www.", "") === baseDomain) {
          internal++;
        } else {
          external++;
        }
      } catch {
        internal++; // Relative links
      }
    }
  } catch (e) {
    console.error("Link analysis error:", e);
  }

  return {
    total: links.length,
    internal,
    external,
    nofollow,
    nofollowPercentage:
      links.length > 0 ? Math.round((nofollow / links.length) * 100) : 0,
  };
}

async function analyzeCanonical(page) {
  const canonical =
    (await page
      .$eval('link[rel="canonical"]', (el) =>
        el ? el.getAttribute("href") : null
      )
      .catch(() => null)) || "";

  const ogUrl = (await getMetaContent(page, "og:url")) || "";

  return {
    exists: canonical.length > 0,
    url: canonical,
    isSelf: canonical === ogUrl,
  };
}

async function analyzeSchemaMarkup(page) {
  const schemas = await page.$$('script[type="application/ld+json"]');
  const types = [];

  for (const schema of schemas) {
    try {
      const jsonText = await schema.evaluate((el) => el.textContent);
      const json = JSON.parse(jsonText);
      if (json["@type"]) types.push(json["@type"]);
    } catch (e) {
      console.error("Schema parsing error:", e);
    }
  }

  return {
    count: schemas.length,
    types: [...new Set(types)], // Unique types only
  };
}

async function analyzeLanguage(page) {
  return page.$eval("html", (el) => el.getAttribute("lang")).catch(() => null);
}

async function analyzeDoctype(page) {
  return page.evaluate(() => {
    const doctype = document.doctype;
    return doctype
      ? `<!DOCTYPE ${doctype.name}` +
          (doctype.publicId ? ` PUBLIC "${doctype.publicId}"` : "") +
          (doctype.systemId ? ` "${doctype.systemId}"` : "") +
          ">"
      : null;
  });
}

function calculateReadabilityScore(text) {
  // Simple readability score calculation (Flesch-Kincaid approximation)
  const words = text
    .trim()
    .split(/\s+/)
    .filter((word) => word.length > 0);
  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 0);
  const syllables = words.reduce(
    (count, word) => count + countSyllables(word),
    0
  );

  if (words.length === 0 || sentences.length === 0) return 0;

  const wordsPerSentence = words.length / sentences.length;
  const syllablesPerWord = syllables / words.length;

  // Flesch Reading Ease Score
  const score = 206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord;

  // Normalize to 0-100 scale
  return Math.max(0, Math.min(100, Math.round(score)));
}

function countSyllables(word) {
  // Simple syllable counting approximation
  word = word.toLowerCase().replace(/[^a-z]/g, "");
  if (word.length <= 3) return 1;

  let syllables = word.replace(/[^aeiouy]/g, "").length;
  syllables -= word.match(/e$/) ? 1 : 0; // Silent e
  syllables -= word.match(/[aeiouy]{2,}/g)?.length || 0; // Diphthongs
  return Math.max(1, syllables);
}

async function analyzeContent(page) {
  const bodyText = await page.$eval("body", (el) => el.textContent);
  const words = bodyText
    .trim()
    .split(/\s+/)
    .filter((word) => word.length > 0);

  const paragraphs = await page.$$("p");
  const lists = await page.$$("ul, ol");

  return {
    wordCount: words.length,
    textLength: bodyText.length,
    readability: calculateReadabilityScore(bodyText),
    paragraphCount: paragraphs.length,
    listCount: lists.length,
  };
}

export const dynamic = "force-dynamic";