fix: pupetter

This commit is contained in:
Val 2025-07-07 23:41:00 +07:00
parent e72a62ed0d
commit 5c3d55966e
2 changed files with 249 additions and 289 deletions

View File

@ -1,6 +1,6 @@
// app/api/seo-check/route.js // app/api/seo-check/route.js
import { NextResponse } from "next/server"; import { NextResponse } from "next/server";
import * as cheerio from "cheerio"; import puppeteer from "puppeteer";
export async function POST(request) { export async function POST(request) {
const startTime = Date.now(); const startTime = Date.now();
@ -33,246 +33,112 @@ export async function POST(request) {
); );
} }
// Fetch HTML with enhanced configuration // Launch Puppeteer browser
const controller = new AbortController(); let browser;
const timeout = setTimeout(() => controller.abort(), 15000);
let response;
try { try {
response = await fetch(url, { browser = await puppeteer.launch({
headers: { headless: true,
"User-Agent":
"Mozilla/5.0 (compatible; SEO-Analyzer/1.0; +https://github.com)",
Accept: "text/html,application/xhtml+xml",
},
redirect: "follow",
signal: controller.signal,
}); });
clearTimeout(timeout);
} catch (e) { } catch (e) {
return NextResponse.json( return NextResponse.json(
{ { error: "Failed to initialize browser" },
error:
e.name === "AbortError"
? "Request timed out"
: `Fetch failed: ${e.message}`,
},
{ status: 400 }
);
}
// Verify response
if (!response.ok) {
return NextResponse.json(
{
error: `HTTP ${response.status}`,
status: response.status,
url: response.url,
},
{ status: 400 }
);
}
const contentType = response.headers.get("content-type");
if (!contentType?.includes("text/html")) {
return NextResponse.json(
{ error: "URL does not return HTML content" },
{ status: 400 }
);
}
// Parse HTML
const html = await response.text();
const finalUrl = response.url;
let $;
try {
$ = cheerio.load(html);
} catch (e) {
return NextResponse.json(
{ error: "Failed to parse HTML content" },
{ status: 500 } { status: 500 }
); );
} }
// Extract security headers let page;
const securityHeaders = { let finalUrl;
https: finalUrl.startsWith("https://"), let html;
xFrameOptions: response.headers.get("x-frame-options"), let securityHeaders = {};
xXSSProtection: response.headers.get("x-xss-protection"), let response;
contentTypeOptions: response.headers.get("x-content-type-options"),
strictTransportSecurity: response.headers.get(
"strict-transport-security"
),
};
// Add this new function to analyze charset try {
function analyzeCharset($) { page = await browser.newPage();
const charsetMeta = $("meta[charset]");
if (charsetMeta.length > 0) {
return {
exists: true,
value: charsetMeta.attr("charset")?.toUpperCase() || "UTF-8",
declaredInMeta: true,
};
}
const httpEquiv = $('meta[http-equiv="Content-Type"]'); // Set user agent and headers
if (httpEquiv.length > 0) { await page.setUserAgent(
const content = httpEquiv.attr("content") || ""; "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
const charsetMatch = content.match(/charset=([^;]+)/i);
if (charsetMatch) {
return {
exists: true,
value: charsetMatch[1].toUpperCase(),
declaredInMeta: true,
};
}
}
return {
exists: false,
value: null,
declaredInMeta: false,
};
}
// Title Tag Analysis
function analyzeTitle($) {
const title = $("title").first().text().trim();
return {
exists: title.length > 0,
text: title,
length: title.length,
status:
title.length >= 30 && title.length <= 60
? "optimal"
: title.length < 30
? "too_short"
: "too_long",
};
}
// Meta Description Analysis
function analyzeMetaDescription($) {
const desc = $('meta[name="description"]').attr("content") || "";
return {
exists: desc.length > 0,
text: desc,
length: desc.length,
status:
desc.length >= 50 && desc.length <= 160
? "optimal"
: desc.length < 50
? "too_short"
: "too_long",
};
}
// Meta Robots Analysis
function analyzeMetaRobots($) {
const content = $('meta[name="robots"]').attr("content") || "";
return {
exists: content.length > 0,
content,
noindex: content.includes("noindex"),
nofollow: content.includes("nofollow"),
};
}
// Viewport Analysis
function analyzeViewport($) {
const viewport = $('meta[name="viewport"]').attr("content") || "";
return {
exists: viewport.length > 0,
content: viewport,
mobileFriendly: viewport.includes("width=device-width"),
};
}
// Text Analysis Functions
function calculateReadabilityScore(text) {
// Simple readability score calculation (Flesch-Kincaid approximation)
const words = text
.trim()
.split(/\s+/)
.filter((word) => word.length > 0);
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 0);
const syllables = words.reduce(
(count, word) => count + countSyllables(word),
0
); );
if (words.length === 0 || sentences.length === 0) return 0; // Listen for response to capture headers
page.on("response", async (res) => {
if (res.url() === url || res.url() === finalUrl) {
response = res;
securityHeaders = {
https: res.url().startsWith("https://"),
xFrameOptions: res.headers()["x-frame-options"],
xXSSProtection: res.headers()["x-xss-protection"],
contentTypeOptions: res.headers()["x-content-type-options"],
strictTransportSecurity: res.headers()["strict-transport-security"],
};
}
});
const wordsPerSentence = words.length / sentences.length; // Navigate to the page with timeout
const syllablesPerWord = syllables / words.length; await page.goto(url, {
waitUntil: "networkidle2",
timeout: 15000,
});
// Flesch Reading Ease Score // Get final URL after potential redirects
const score = finalUrl = page.url();
206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord;
// Normalize to 0-100 scale // Get the full HTML content
return Math.max(0, Math.min(100, Math.round(score))); html = await page.content();
} catch (e) {
await browser.close();
return NextResponse.json(
{
error:
e.name === "TimeoutError"
? "Page load timed out"
: `Failed to load page: ${e.message}`,
},
{ status: 400 }
);
} }
function countSyllables(word) { // Use Puppeteer's DOM methods for analysis
// Simple syllable counting approximation
word = word.toLowerCase().replace(/[^a-z]/g, "");
if (word.length <= 3) return 1;
let syllables = word.replace(/[^aeiouy]/g, "").length;
syllables -= word.match(/e$/) ? 1 : 0; // Silent e
syllables -= word.match(/[aeiouy]{2,}/g)?.length || 0; // Diphthongs
return Math.max(1, syllables);
}
// Content Analysis
function analyzeContent($) {
const bodyText = $("body").text();
const words = bodyText
.trim()
.split(/\s+/)
.filter((word) => word.length > 0);
return {
wordCount: words.length,
textLength: bodyText.length,
readability: calculateReadabilityScore(bodyText),
paragraphCount: $("p").length,
listCount: $("ul, ol").length,
};
}
// Perform comprehensive analysis
const analysis = { const analysis = {
url: finalUrl, url: finalUrl,
pageLoadTime: (Date.now() - startTime) / 1000, pageLoadTime: (Date.now() - startTime) / 1000,
title: analyzeTitle($), title: await analyzeTitle(page),
meta: { meta: {
description: analyzeMetaDescription($), description: await analyzeMetaDescription(page),
robots: analyzeMetaRobots($), robots: await analyzeMetaRobots(page),
viewport: analyzeViewport($), viewport: await analyzeViewport(page),
charset: analyzeCharset($), charset: await analyzeCharset(page),
keywords: $('meta[name="keywords"]').attr("content") || null, keywords: await getMetaContent(page, "keywords"),
}, },
headings: analyzeHeadings($), headings: await analyzeHeadings(page),
images: analyzeImages($), images: await analyzeImages(page),
links: analyzeLinks($, finalUrl), links: await analyzeLinks(page, finalUrl),
content: analyzeContent($), content: await analyzeContent(page),
technical: { technical: {
canonical: analyzeCanonical($), canonical: await analyzeCanonical(page),
language: analyzeLanguage($), language: await analyzeLanguage(page),
schemaMarkup: analyzeSchemaMarkup($), schemaMarkup: await analyzeSchemaMarkup(page),
doctype: analyzeDoctype($), doctype: await analyzeDoctype(page),
}, },
social: { social: {
openGraph: analyzeOpenGraph($), openGraph: {
twitterCard: analyzeTwitterCards($), title: await getMetaContent(page, "og:title"),
description: await getMetaContent(page, "og:description"),
image: await getMetaContent(page, "og:image"),
url: await getMetaContent(page, "og:url"),
},
twitterCard: {
card: await getMetaContent(page, "twitter:card"),
title: await getMetaContent(page, "twitter:title"),
description: await getMetaContent(page, "twitter:description"),
image: await getMetaContent(page, "twitter:image"),
},
}, },
security: securityHeaders, security: securityHeaders,
analyzedAt: new Date().toISOString(), analyzedAt: new Date().toISOString(),
}; };
await browser.close();
return NextResponse.json(analysis); return NextResponse.json(analysis);
} catch (error) { } catch (error) {
console.error("SEO analysis error:", error); console.error("SEO analysis error:", error);
@ -287,9 +153,21 @@ export async function POST(request) {
} }
} }
// Analysis Functions // Helper function to get meta content
function analyzeTitle($) { async function getMetaContent(page, nameOrProperty) {
const title = $("title").first().text().trim(); try {
return await page.$eval(
`meta[name="${nameOrProperty}"], meta[property="${nameOrProperty}"]`,
(el) => (el ? el.getAttribute("content") : null)
);
} catch {
return null;
}
}
// Analysis functions using Puppeteer's DOM API
async function analyzeTitle(page) {
const title = await page.title();
return { return {
exists: title.length > 0, exists: title.length > 0,
text: title, text: title,
@ -303,8 +181,8 @@ function analyzeTitle($) {
}; };
} }
function analyzeMetaDescription($) { async function analyzeMetaDescription(page) {
const desc = $('meta[name="description"]').attr("content") || ""; const desc = (await getMetaContent(page, "description")) || "";
return { return {
exists: desc.length > 0, exists: desc.length > 0,
text: desc, text: desc,
@ -318,8 +196,8 @@ function analyzeMetaDescription($) {
}; };
} }
function analyzeMetaRobots($) { async function analyzeMetaRobots(page) {
const content = $('meta[name="robots"]').attr("content") || ""; const content = (await getMetaContent(page, "robots")) || "";
return { return {
exists: content.length > 0, exists: content.length > 0,
content, content,
@ -328,8 +206,8 @@ function analyzeMetaRobots($) {
}; };
} }
function analyzeViewport($) { async function analyzeViewport(page) {
const viewport = $('meta[name="viewport"]').attr("content") || ""; const viewport = (await getMetaContent(page, "viewport")) || "";
return { return {
exists: viewport.length > 0, exists: viewport.length > 0,
content: viewport, content: viewport,
@ -337,35 +215,79 @@ function analyzeViewport($) {
}; };
} }
function analyzeHeadings($) { async function analyzeCharset(page) {
try {
// Check meta charset
const charsetMeta = await page.$eval("meta[charset]", (el) =>
el.getAttribute("charset")
);
if (charsetMeta) {
return {
exists: true,
value: charsetMeta.toUpperCase(),
declaredInMeta: true,
};
}
// Check http-equiv
const httpEquiv = await page.$eval(
'meta[http-equiv="Content-Type"]',
(el) => el.getAttribute("content")
);
if (httpEquiv) {
const charsetMatch = httpEquiv.match(/charset=([^;]+)/i);
if (charsetMatch) {
return {
exists: true,
value: charsetMatch[1].toUpperCase(),
declaredInMeta: true,
};
}
}
return {
exists: false,
value: null,
declaredInMeta: false,
};
} catch {
return {
exists: false,
value: null,
declaredInMeta: false,
};
}
}
async function analyzeHeadings(page) {
const getHeadingTexts = async (selector) => {
return page.$$eval(selector, (els) =>
els.map((el) => el.textContent.trim())
);
};
return { return {
h1: { h1: {
count: $("h1").length, count: await page.$$eval("h1", (els) => els.length),
texts: $("h1") texts: await getHeadingTexts("h1"),
.map((i, el) => $(el).text().trim())
.get(),
}, },
h2: { h2: {
count: $("h2").length, count: await page.$$eval("h2", (els) => els.length),
texts: $("h2") texts: await getHeadingTexts("h2"),
.map((i, el) => $(el).text().trim())
.get(),
}, },
h3: { h3: {
count: $("h3").length, count: await page.$$eval("h3", (els) => els.length),
texts: $("h3") texts: await getHeadingTexts("h3"),
.map((i, el) => $(el).text().trim())
.get(),
}, },
}; };
} }
function analyzeImages($) { async function analyzeImages(page) {
const images = $("img"); const images = await page.$$("img");
const withAlt = images.filter((i, el) => { const withAlt = await page.$$eval(
const alt = $(el).attr("alt"); "img",
return alt && alt.trim() !== ""; (imgs) => imgs.filter((img) => img.alt && img.alt.trim() !== "").length
}).length; );
return { return {
total: images.length, total: images.length,
@ -376,8 +298,8 @@ function analyzeImages($) {
}; };
} }
function analyzeLinks($, baseUrl) { async function analyzeLinks(page, baseUrl) {
const links = $("a[href]"); const links = await page.$$("a[href]");
let internal = 0; let internal = 0;
let external = 0; let external = 0;
let nofollow = 0; let nofollow = 0;
@ -385,9 +307,9 @@ function analyzeLinks($, baseUrl) {
try { try {
const baseDomain = new URL(baseUrl).hostname.replace("www.", ""); const baseDomain = new URL(baseUrl).hostname.replace("www.", "");
links.each((i, el) => { for (const link of links) {
const href = $(el).attr("href"); const href = await link.evaluate((el) => el.getAttribute("href"));
const rel = $(el).attr("rel") || ""; const rel = await link.evaluate((el) => el.getAttribute("rel") || "");
if (rel.includes("nofollow")) nofollow++; if (rel.includes("nofollow")) nofollow++;
@ -401,7 +323,7 @@ function analyzeLinks($, baseUrl) {
} catch { } catch {
internal++; // Relative links internal++; // Relative links
} }
}); }
} catch (e) { } catch (e) {
console.error("Link analysis error:", e); console.error("Link analysis error:", e);
} }
@ -416,41 +338,36 @@ function analyzeLinks($, baseUrl) {
}; };
} }
function analyzeContent($) { async function analyzeCanonical(page) {
const bodyText = $("body").text(); const canonical =
const words = bodyText (await page
.trim() .$eval('link[rel="canonical"]', (el) =>
.split(/\s+/) el ? el.getAttribute("href") : null
.filter((word) => word.length > 0); )
.catch(() => null)) || "";
return { const ogUrl = (await getMetaContent(page, "og:url")) || "";
wordCount: words.length,
textLength: bodyText.length,
readability: calculateReadabilityScore(words), // Implement your own formula
};
}
function analyzeCanonical($) {
const canonical = $('link[rel="canonical"]').attr("href") || "";
return { return {
exists: canonical.length > 0, exists: canonical.length > 0,
url: canonical, url: canonical,
isSelf: canonical === $('meta[property="og:url"]').attr("content"), isSelf: canonical === ogUrl,
}; };
} }
function analyzeSchemaMarkup($) { async function analyzeSchemaMarkup(page) {
const schemas = $('script[type="application/ld+json"]'); const schemas = await page.$$('script[type="application/ld+json"]');
const types = []; const types = [];
schemas.each((i, el) => { for (const schema of schemas) {
try { try {
const json = JSON.parse($(el).text()); const jsonText = await schema.evaluate((el) => el.textContent);
const json = JSON.parse(jsonText);
if (json["@type"]) types.push(json["@type"]); if (json["@type"]) types.push(json["@type"]);
} catch (e) { } catch (e) {
console.error("Schema parsing error:", e); console.error("Schema parsing error:", e);
} }
}); }
return { return {
count: schemas.length, count: schemas.length,
@ -458,31 +375,74 @@ function analyzeSchemaMarkup($) {
}; };
} }
function analyzeOpenGraph($) { async function analyzeLanguage(page) {
return page.$eval("html", (el) => el.getAttribute("lang")).catch(() => null);
}
async function analyzeDoctype(page) {
return page.evaluate(() => {
const doctype = document.doctype;
return doctype
? `<!DOCTYPE ${doctype.name}` +
(doctype.publicId ? ` PUBLIC "${doctype.publicId}"` : "") +
(doctype.systemId ? ` "${doctype.systemId}"` : "") +
">"
: null;
});
}
function calculateReadabilityScore(text) {
// Simple readability score calculation (Flesch-Kincaid approximation)
const words = text
.trim()
.split(/\s+/)
.filter((word) => word.length > 0);
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 0);
const syllables = words.reduce(
(count, word) => count + countSyllables(word),
0
);
if (words.length === 0 || sentences.length === 0) return 0;
const wordsPerSentence = words.length / sentences.length;
const syllablesPerWord = syllables / words.length;
// Flesch Reading Ease Score
const score = 206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord;
// Normalize to 0-100 scale
return Math.max(0, Math.min(100, Math.round(score)));
}
function countSyllables(word) {
// Simple syllable counting approximation
word = word.toLowerCase().replace(/[^a-z]/g, "");
if (word.length <= 3) return 1;
let syllables = word.replace(/[^aeiouy]/g, "").length;
syllables -= word.match(/e$/) ? 1 : 0; // Silent e
syllables -= word.match(/[aeiouy]{2,}/g)?.length || 0; // Diphthongs
return Math.max(1, syllables);
}
async function analyzeContent(page) {
const bodyText = await page.$eval("body", (el) => el.textContent);
const words = bodyText
.trim()
.split(/\s+/)
.filter((word) => word.length > 0);
const paragraphs = await page.$$("p");
const lists = await page.$$("ul, ol");
return { return {
title: $('meta[property="og:title"]').attr("content") || "", wordCount: words.length,
description: $('meta[property="og:description"]').attr("content") || "", textLength: bodyText.length,
image: $('meta[property="og:image"]').attr("content") || "", readability: calculateReadabilityScore(bodyText),
url: $('meta[property="og:url"]').attr("content") || "", paragraphCount: paragraphs.length,
listCount: lists.length,
}; };
} }
function analyzeTwitterCards($) {
return {
card: $('meta[name="twitter:card"]').attr("content") || "",
title: $('meta[name="twitter:title"]').attr("content") || "",
description: $('meta[name="twitter:description"]').attr("content") || "",
image: $('meta[name="twitter:image"]').attr("content") || "",
};
}
function analyzeLanguage($) {
return $("html").attr("lang") || null;
}
function analyzeDoctype($) {
const doctype = $("html")[0]?.prev?.data;
return doctype?.includes("<!DOCTYPE") ? doctype : null;
}
export const dynamic = "force-dynamic"; export const dynamic = "force-dynamic";

View File

@ -20,7 +20,7 @@ export default function Home() {
<footer className="bg-white py-8 border-t"> <footer className="bg-white py-8 border-t">
<div className="container mx-auto px-4 text-center text-gray-500"> <div className="container mx-auto px-4 text-center text-gray-500">
<p>© {new Date().getFullYear()} SEO Analyzer Tool</p> <p>© {new Date().getFullYear()} RankRunners. All Rights Reserved</p>
</div> </div>
</footer> </footer>
</Layout> </Layout>