diff --git a/app/api/seo-check/route.js b/app/api/seo-check/route.js index 591209d..7fddc61 100644 --- a/app/api/seo-check/route.js +++ b/app/api/seo-check/route.js @@ -1,6 +1,6 @@ // app/api/seo-check/route.js import { NextResponse } from "next/server"; -import * as cheerio from "cheerio"; +import puppeteer from "puppeteer"; export async function POST(request) { const startTime = Date.now(); @@ -33,246 +33,112 @@ export async function POST(request) { ); } - // Fetch HTML with enhanced configuration - const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), 15000); - - let response; + // Launch Puppeteer browser + let browser; try { - response = await fetch(url, { - headers: { - "User-Agent": - "Mozilla/5.0 (compatible; SEO-Analyzer/1.0; +https://github.com)", - Accept: "text/html,application/xhtml+xml", - }, - redirect: "follow", - signal: controller.signal, + browser = await puppeteer.launch({ + headless: true, }); - clearTimeout(timeout); } catch (e) { return NextResponse.json( - { - error: - e.name === "AbortError" - ? "Request timed out" - : `Fetch failed: ${e.message}`, - }, - { status: 400 } - ); - } - - // Verify response - if (!response.ok) { - return NextResponse.json( - { - error: `HTTP ${response.status}`, - status: response.status, - url: response.url, - }, - { status: 400 } - ); - } - - const contentType = response.headers.get("content-type"); - if (!contentType?.includes("text/html")) { - return NextResponse.json( - { error: "URL does not return HTML content" }, - { status: 400 } - ); - } - - // Parse HTML - const html = await response.text(); - const finalUrl = response.url; - let $; - try { - $ = cheerio.load(html); - } catch (e) { - return NextResponse.json( - { error: "Failed to parse HTML content" }, + { error: "Failed to initialize browser" }, { status: 500 } ); } - // Extract security headers - const securityHeaders = { - https: finalUrl.startsWith("https://"), - xFrameOptions: response.headers.get("x-frame-options"), - xXSSProtection: response.headers.get("x-xss-protection"), - contentTypeOptions: response.headers.get("x-content-type-options"), - strictTransportSecurity: response.headers.get( - "strict-transport-security" - ), - }; + let page; + let finalUrl; + let html; + let securityHeaders = {}; + let response; - // Add this new function to analyze charset - function analyzeCharset($) { - const charsetMeta = $("meta[charset]"); - if (charsetMeta.length > 0) { - return { - exists: true, - value: charsetMeta.attr("charset")?.toUpperCase() || "UTF-8", - declaredInMeta: true, - }; - } + try { + page = await browser.newPage(); - const httpEquiv = $('meta[http-equiv="Content-Type"]'); - if (httpEquiv.length > 0) { - const content = httpEquiv.attr("content") || ""; - const charsetMatch = content.match(/charset=([^;]+)/i); - if (charsetMatch) { - return { - exists: true, - value: charsetMatch[1].toUpperCase(), - declaredInMeta: true, - }; - } - } - - return { - exists: false, - value: null, - declaredInMeta: false, - }; - } - - // Title Tag Analysis - function analyzeTitle($) { - const title = $("title").first().text().trim(); - return { - exists: title.length > 0, - text: title, - length: title.length, - status: - title.length >= 30 && title.length <= 60 - ? "optimal" - : title.length < 30 - ? "too_short" - : "too_long", - }; - } - - // Meta Description Analysis - function analyzeMetaDescription($) { - const desc = $('meta[name="description"]').attr("content") || ""; - return { - exists: desc.length > 0, - text: desc, - length: desc.length, - status: - desc.length >= 50 && desc.length <= 160 - ? "optimal" - : desc.length < 50 - ? "too_short" - : "too_long", - }; - } - - // Meta Robots Analysis - function analyzeMetaRobots($) { - const content = $('meta[name="robots"]').attr("content") || ""; - return { - exists: content.length > 0, - content, - noindex: content.includes("noindex"), - nofollow: content.includes("nofollow"), - }; - } - - // Viewport Analysis - function analyzeViewport($) { - const viewport = $('meta[name="viewport"]').attr("content") || ""; - return { - exists: viewport.length > 0, - content: viewport, - mobileFriendly: viewport.includes("width=device-width"), - }; - } - - // Text Analysis Functions - function calculateReadabilityScore(text) { - // Simple readability score calculation (Flesch-Kincaid approximation) - const words = text - .trim() - .split(/\s+/) - .filter((word) => word.length > 0); - const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 0); - const syllables = words.reduce( - (count, word) => count + countSyllables(word), - 0 + // Set user agent and headers + await page.setUserAgent( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ); - if (words.length === 0 || sentences.length === 0) return 0; + // Listen for response to capture headers + page.on("response", async (res) => { + if (res.url() === url || res.url() === finalUrl) { + response = res; + securityHeaders = { + https: res.url().startsWith("https://"), + xFrameOptions: res.headers()["x-frame-options"], + xXSSProtection: res.headers()["x-xss-protection"], + contentTypeOptions: res.headers()["x-content-type-options"], + strictTransportSecurity: res.headers()["strict-transport-security"], + }; + } + }); - const wordsPerSentence = words.length / sentences.length; - const syllablesPerWord = syllables / words.length; + // Navigate to the page with timeout + await page.goto(url, { + waitUntil: "networkidle2", + timeout: 15000, + }); - // Flesch Reading Ease Score - const score = - 206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord; + // Get final URL after potential redirects + finalUrl = page.url(); - // Normalize to 0-100 scale - return Math.max(0, Math.min(100, Math.round(score))); + // Get the full HTML content + html = await page.content(); + } catch (e) { + await browser.close(); + return NextResponse.json( + { + error: + e.name === "TimeoutError" + ? "Page load timed out" + : `Failed to load page: ${e.message}`, + }, + { status: 400 } + ); } - function countSyllables(word) { - // Simple syllable counting approximation - word = word.toLowerCase().replace(/[^a-z]/g, ""); - if (word.length <= 3) return 1; - - let syllables = word.replace(/[^aeiouy]/g, "").length; - syllables -= word.match(/e$/) ? 1 : 0; // Silent e - syllables -= word.match(/[aeiouy]{2,}/g)?.length || 0; // Diphthongs - return Math.max(1, syllables); - } - - // Content Analysis - function analyzeContent($) { - const bodyText = $("body").text(); - const words = bodyText - .trim() - .split(/\s+/) - .filter((word) => word.length > 0); - - return { - wordCount: words.length, - textLength: bodyText.length, - readability: calculateReadabilityScore(bodyText), - paragraphCount: $("p").length, - listCount: $("ul, ol").length, - }; - } - - // Perform comprehensive analysis + // Use Puppeteer's DOM methods for analysis const analysis = { url: finalUrl, pageLoadTime: (Date.now() - startTime) / 1000, - title: analyzeTitle($), + title: await analyzeTitle(page), meta: { - description: analyzeMetaDescription($), - robots: analyzeMetaRobots($), - viewport: analyzeViewport($), - charset: analyzeCharset($), - keywords: $('meta[name="keywords"]').attr("content") || null, + description: await analyzeMetaDescription(page), + robots: await analyzeMetaRobots(page), + viewport: await analyzeViewport(page), + charset: await analyzeCharset(page), + keywords: await getMetaContent(page, "keywords"), }, - headings: analyzeHeadings($), - images: analyzeImages($), - links: analyzeLinks($, finalUrl), - content: analyzeContent($), + headings: await analyzeHeadings(page), + images: await analyzeImages(page), + links: await analyzeLinks(page, finalUrl), + content: await analyzeContent(page), technical: { - canonical: analyzeCanonical($), - language: analyzeLanguage($), - schemaMarkup: analyzeSchemaMarkup($), - doctype: analyzeDoctype($), + canonical: await analyzeCanonical(page), + language: await analyzeLanguage(page), + schemaMarkup: await analyzeSchemaMarkup(page), + doctype: await analyzeDoctype(page), }, social: { - openGraph: analyzeOpenGraph($), - twitterCard: analyzeTwitterCards($), + openGraph: { + title: await getMetaContent(page, "og:title"), + description: await getMetaContent(page, "og:description"), + image: await getMetaContent(page, "og:image"), + url: await getMetaContent(page, "og:url"), + }, + twitterCard: { + card: await getMetaContent(page, "twitter:card"), + title: await getMetaContent(page, "twitter:title"), + description: await getMetaContent(page, "twitter:description"), + image: await getMetaContent(page, "twitter:image"), + }, }, security: securityHeaders, analyzedAt: new Date().toISOString(), }; + await browser.close(); return NextResponse.json(analysis); } catch (error) { console.error("SEO analysis error:", error); @@ -287,9 +153,21 @@ export async function POST(request) { } } -// Analysis Functions -function analyzeTitle($) { - const title = $("title").first().text().trim(); +// Helper function to get meta content +async function getMetaContent(page, nameOrProperty) { + try { + return await page.$eval( + `meta[name="${nameOrProperty}"], meta[property="${nameOrProperty}"]`, + (el) => (el ? el.getAttribute("content") : null) + ); + } catch { + return null; + } +} + +// Analysis functions using Puppeteer's DOM API +async function analyzeTitle(page) { + const title = await page.title(); return { exists: title.length > 0, text: title, @@ -303,8 +181,8 @@ function analyzeTitle($) { }; } -function analyzeMetaDescription($) { - const desc = $('meta[name="description"]').attr("content") || ""; +async function analyzeMetaDescription(page) { + const desc = (await getMetaContent(page, "description")) || ""; return { exists: desc.length > 0, text: desc, @@ -318,8 +196,8 @@ function analyzeMetaDescription($) { }; } -function analyzeMetaRobots($) { - const content = $('meta[name="robots"]').attr("content") || ""; +async function analyzeMetaRobots(page) { + const content = (await getMetaContent(page, "robots")) || ""; return { exists: content.length > 0, content, @@ -328,8 +206,8 @@ function analyzeMetaRobots($) { }; } -function analyzeViewport($) { - const viewport = $('meta[name="viewport"]').attr("content") || ""; +async function analyzeViewport(page) { + const viewport = (await getMetaContent(page, "viewport")) || ""; return { exists: viewport.length > 0, content: viewport, @@ -337,35 +215,79 @@ function analyzeViewport($) { }; } -function analyzeHeadings($) { +async function analyzeCharset(page) { + try { + // Check meta charset + const charsetMeta = await page.$eval("meta[charset]", (el) => + el.getAttribute("charset") + ); + if (charsetMeta) { + return { + exists: true, + value: charsetMeta.toUpperCase(), + declaredInMeta: true, + }; + } + + // Check http-equiv + const httpEquiv = await page.$eval( + 'meta[http-equiv="Content-Type"]', + (el) => el.getAttribute("content") + ); + if (httpEquiv) { + const charsetMatch = httpEquiv.match(/charset=([^;]+)/i); + if (charsetMatch) { + return { + exists: true, + value: charsetMatch[1].toUpperCase(), + declaredInMeta: true, + }; + } + } + + return { + exists: false, + value: null, + declaredInMeta: false, + }; + } catch { + return { + exists: false, + value: null, + declaredInMeta: false, + }; + } +} + +async function analyzeHeadings(page) { + const getHeadingTexts = async (selector) => { + return page.$$eval(selector, (els) => + els.map((el) => el.textContent.trim()) + ); + }; + return { h1: { - count: $("h1").length, - texts: $("h1") - .map((i, el) => $(el).text().trim()) - .get(), + count: await page.$$eval("h1", (els) => els.length), + texts: await getHeadingTexts("h1"), }, h2: { - count: $("h2").length, - texts: $("h2") - .map((i, el) => $(el).text().trim()) - .get(), + count: await page.$$eval("h2", (els) => els.length), + texts: await getHeadingTexts("h2"), }, h3: { - count: $("h3").length, - texts: $("h3") - .map((i, el) => $(el).text().trim()) - .get(), + count: await page.$$eval("h3", (els) => els.length), + texts: await getHeadingTexts("h3"), }, }; } -function analyzeImages($) { - const images = $("img"); - const withAlt = images.filter((i, el) => { - const alt = $(el).attr("alt"); - return alt && alt.trim() !== ""; - }).length; +async function analyzeImages(page) { + const images = await page.$$("img"); + const withAlt = await page.$$eval( + "img", + (imgs) => imgs.filter((img) => img.alt && img.alt.trim() !== "").length + ); return { total: images.length, @@ -376,8 +298,8 @@ function analyzeImages($) { }; } -function analyzeLinks($, baseUrl) { - const links = $("a[href]"); +async function analyzeLinks(page, baseUrl) { + const links = await page.$$("a[href]"); let internal = 0; let external = 0; let nofollow = 0; @@ -385,9 +307,9 @@ function analyzeLinks($, baseUrl) { try { const baseDomain = new URL(baseUrl).hostname.replace("www.", ""); - links.each((i, el) => { - const href = $(el).attr("href"); - const rel = $(el).attr("rel") || ""; + for (const link of links) { + const href = await link.evaluate((el) => el.getAttribute("href")); + const rel = await link.evaluate((el) => el.getAttribute("rel") || ""); if (rel.includes("nofollow")) nofollow++; @@ -401,7 +323,7 @@ function analyzeLinks($, baseUrl) { } catch { internal++; // Relative links } - }); + } } catch (e) { console.error("Link analysis error:", e); } @@ -416,41 +338,36 @@ function analyzeLinks($, baseUrl) { }; } -function analyzeContent($) { - const bodyText = $("body").text(); - const words = bodyText - .trim() - .split(/\s+/) - .filter((word) => word.length > 0); +async function analyzeCanonical(page) { + const canonical = + (await page + .$eval('link[rel="canonical"]', (el) => + el ? el.getAttribute("href") : null + ) + .catch(() => null)) || ""; - return { - wordCount: words.length, - textLength: bodyText.length, - readability: calculateReadabilityScore(words), // Implement your own formula - }; -} + const ogUrl = (await getMetaContent(page, "og:url")) || ""; -function analyzeCanonical($) { - const canonical = $('link[rel="canonical"]').attr("href") || ""; return { exists: canonical.length > 0, url: canonical, - isSelf: canonical === $('meta[property="og:url"]').attr("content"), + isSelf: canonical === ogUrl, }; } -function analyzeSchemaMarkup($) { - const schemas = $('script[type="application/ld+json"]'); +async function analyzeSchemaMarkup(page) { + const schemas = await page.$$('script[type="application/ld+json"]'); const types = []; - schemas.each((i, el) => { + for (const schema of schemas) { try { - const json = JSON.parse($(el).text()); + const jsonText = await schema.evaluate((el) => el.textContent); + const json = JSON.parse(jsonText); if (json["@type"]) types.push(json["@type"]); } catch (e) { console.error("Schema parsing error:", e); } - }); + } return { count: schemas.length, @@ -458,31 +375,74 @@ function analyzeSchemaMarkup($) { }; } -function analyzeOpenGraph($) { +async function analyzeLanguage(page) { + return page.$eval("html", (el) => el.getAttribute("lang")).catch(() => null); +} + +async function analyzeDoctype(page) { + return page.evaluate(() => { + const doctype = document.doctype; + return doctype + ? `" + : null; + }); +} + +function calculateReadabilityScore(text) { + // Simple readability score calculation (Flesch-Kincaid approximation) + const words = text + .trim() + .split(/\s+/) + .filter((word) => word.length > 0); + const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 0); + const syllables = words.reduce( + (count, word) => count + countSyllables(word), + 0 + ); + + if (words.length === 0 || sentences.length === 0) return 0; + + const wordsPerSentence = words.length / sentences.length; + const syllablesPerWord = syllables / words.length; + + // Flesch Reading Ease Score + const score = 206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord; + + // Normalize to 0-100 scale + return Math.max(0, Math.min(100, Math.round(score))); +} + +function countSyllables(word) { + // Simple syllable counting approximation + word = word.toLowerCase().replace(/[^a-z]/g, ""); + if (word.length <= 3) return 1; + + let syllables = word.replace(/[^aeiouy]/g, "").length; + syllables -= word.match(/e$/) ? 1 : 0; // Silent e + syllables -= word.match(/[aeiouy]{2,}/g)?.length || 0; // Diphthongs + return Math.max(1, syllables); +} + +async function analyzeContent(page) { + const bodyText = await page.$eval("body", (el) => el.textContent); + const words = bodyText + .trim() + .split(/\s+/) + .filter((word) => word.length > 0); + + const paragraphs = await page.$$("p"); + const lists = await page.$$("ul, ol"); + return { - title: $('meta[property="og:title"]').attr("content") || "", - description: $('meta[property="og:description"]').attr("content") || "", - image: $('meta[property="og:image"]').attr("content") || "", - url: $('meta[property="og:url"]').attr("content") || "", + wordCount: words.length, + textLength: bodyText.length, + readability: calculateReadabilityScore(bodyText), + paragraphCount: paragraphs.length, + listCount: lists.length, }; } -function analyzeTwitterCards($) { - return { - card: $('meta[name="twitter:card"]').attr("content") || "", - title: $('meta[name="twitter:title"]').attr("content") || "", - description: $('meta[name="twitter:description"]').attr("content") || "", - image: $('meta[name="twitter:image"]').attr("content") || "", - }; -} - -function analyzeLanguage($) { - return $("html").attr("lang") || null; -} - -function analyzeDoctype($) { - const doctype = $("html")[0]?.prev?.data; - return doctype?.includes("
© {new Date().getFullYear()} SEO Analyzer Tool
+© {new Date().getFullYear()} RankRunners. All Rights Reserved