fix: pupetter

This commit is contained in:
Val 2025-07-07 23:41:00 +07:00
parent e72a62ed0d
commit 5c3d55966e
2 changed files with 249 additions and 289 deletions

View File

@ -1,6 +1,6 @@
// app/api/seo-check/route.js
import { NextResponse } from "next/server";
import * as cheerio from "cheerio";
import puppeteer from "puppeteer";
export async function POST(request) {
const startTime = Date.now();
@ -33,93 +33,209 @@ export async function POST(request) {
);
}
// Fetch HTML with enhanced configuration
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 15000);
let response;
// Launch Puppeteer browser
let browser;
try {
response = await fetch(url, {
headers: {
"User-Agent":
"Mozilla/5.0 (compatible; SEO-Analyzer/1.0; +https://github.com)",
Accept: "text/html,application/xhtml+xml",
},
redirect: "follow",
signal: controller.signal,
browser = await puppeteer.launch({
headless: true,
});
clearTimeout(timeout);
} catch (e) {
return NextResponse.json(
{
error:
e.name === "AbortError"
? "Request timed out"
: `Fetch failed: ${e.message}`,
},
{ status: 400 }
);
}
// Verify response
if (!response.ok) {
return NextResponse.json(
{
error: `HTTP ${response.status}`,
status: response.status,
url: response.url,
},
{ status: 400 }
);
}
const contentType = response.headers.get("content-type");
if (!contentType?.includes("text/html")) {
return NextResponse.json(
{ error: "URL does not return HTML content" },
{ status: 400 }
);
}
// Parse HTML
const html = await response.text();
const finalUrl = response.url;
let $;
try {
$ = cheerio.load(html);
} catch (e) {
return NextResponse.json(
{ error: "Failed to parse HTML content" },
{ error: "Failed to initialize browser" },
{ status: 500 }
);
}
// Extract security headers
const securityHeaders = {
https: finalUrl.startsWith("https://"),
xFrameOptions: response.headers.get("x-frame-options"),
xXSSProtection: response.headers.get("x-xss-protection"),
contentTypeOptions: response.headers.get("x-content-type-options"),
strictTransportSecurity: response.headers.get(
"strict-transport-security"
),
let page;
let finalUrl;
let html;
let securityHeaders = {};
let response;
try {
page = await browser.newPage();
// Set user agent and headers
await page.setUserAgent(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
);
// Listen for response to capture headers
page.on("response", async (res) => {
if (res.url() === url || res.url() === finalUrl) {
response = res;
securityHeaders = {
https: res.url().startsWith("https://"),
xFrameOptions: res.headers()["x-frame-options"],
xXSSProtection: res.headers()["x-xss-protection"],
contentTypeOptions: res.headers()["x-content-type-options"],
strictTransportSecurity: res.headers()["strict-transport-security"],
};
}
});
// Navigate to the page with timeout
await page.goto(url, {
waitUntil: "networkidle2",
timeout: 15000,
});
// Get final URL after potential redirects
finalUrl = page.url();
// Get the full HTML content
html = await page.content();
} catch (e) {
await browser.close();
return NextResponse.json(
{
error:
e.name === "TimeoutError"
? "Page load timed out"
: `Failed to load page: ${e.message}`,
},
{ status: 400 }
);
}
// Use Puppeteer's DOM methods for analysis
const analysis = {
url: finalUrl,
pageLoadTime: (Date.now() - startTime) / 1000,
title: await analyzeTitle(page),
meta: {
description: await analyzeMetaDescription(page),
robots: await analyzeMetaRobots(page),
viewport: await analyzeViewport(page),
charset: await analyzeCharset(page),
keywords: await getMetaContent(page, "keywords"),
},
headings: await analyzeHeadings(page),
images: await analyzeImages(page),
links: await analyzeLinks(page, finalUrl),
content: await analyzeContent(page),
technical: {
canonical: await analyzeCanonical(page),
language: await analyzeLanguage(page),
schemaMarkup: await analyzeSchemaMarkup(page),
doctype: await analyzeDoctype(page),
},
social: {
openGraph: {
title: await getMetaContent(page, "og:title"),
description: await getMetaContent(page, "og:description"),
image: await getMetaContent(page, "og:image"),
url: await getMetaContent(page, "og:url"),
},
twitterCard: {
card: await getMetaContent(page, "twitter:card"),
title: await getMetaContent(page, "twitter:title"),
description: await getMetaContent(page, "twitter:description"),
image: await getMetaContent(page, "twitter:image"),
},
},
security: securityHeaders,
analyzedAt: new Date().toISOString(),
};
// Add this new function to analyze charset
function analyzeCharset($) {
const charsetMeta = $("meta[charset]");
if (charsetMeta.length > 0) {
await browser.close();
return NextResponse.json(analysis);
} catch (error) {
console.error("SEO analysis error:", error);
return NextResponse.json(
{
error: "Internal server error during analysis",
details:
process.env.NODE_ENV === "development" ? error.stack : undefined,
},
{ status: 500 }
);
}
}
// Helper function to get meta content
async function getMetaContent(page, nameOrProperty) {
try {
return await page.$eval(
`meta[name="${nameOrProperty}"], meta[property="${nameOrProperty}"]`,
(el) => (el ? el.getAttribute("content") : null)
);
} catch {
return null;
}
}
// Analysis functions using Puppeteer's DOM API
async function analyzeTitle(page) {
const title = await page.title();
return {
exists: title.length > 0,
text: title,
length: title.length,
status:
title.length >= 30 && title.length <= 60
? "optimal"
: title.length < 30
? "too_short"
: "too_long",
};
}
async function analyzeMetaDescription(page) {
const desc = (await getMetaContent(page, "description")) || "";
return {
exists: desc.length > 0,
text: desc,
length: desc.length,
status:
desc.length >= 50 && desc.length <= 160
? "optimal"
: desc.length < 50
? "too_short"
: "too_long",
};
}
async function analyzeMetaRobots(page) {
const content = (await getMetaContent(page, "robots")) || "";
return {
exists: content.length > 0,
content,
noindex: content.includes("noindex"),
nofollow: content.includes("nofollow"),
};
}
async function analyzeViewport(page) {
const viewport = (await getMetaContent(page, "viewport")) || "";
return {
exists: viewport.length > 0,
content: viewport,
mobileFriendly: viewport.includes("width=device-width"),
};
}
async function analyzeCharset(page) {
try {
// Check meta charset
const charsetMeta = await page.$eval("meta[charset]", (el) =>
el.getAttribute("charset")
);
if (charsetMeta) {
return {
exists: true,
value: charsetMeta.attr("charset")?.toUpperCase() || "UTF-8",
value: charsetMeta.toUpperCase(),
declaredInMeta: true,
};
}
const httpEquiv = $('meta[http-equiv="Content-Type"]');
if (httpEquiv.length > 0) {
const content = httpEquiv.attr("content") || "";
const charsetMatch = content.match(/charset=([^;]+)/i);
// Check http-equiv
const httpEquiv = await page.$eval(
'meta[http-equiv="Content-Type"]',
(el) => el.getAttribute("content")
);
if (httpEquiv) {
const charsetMatch = httpEquiv.match(/charset=([^;]+)/i);
if (charsetMatch) {
return {
exists: true,
@ -134,63 +250,148 @@ export async function POST(request) {
value: null,
declaredInMeta: false,
};
}
// Title Tag Analysis
function analyzeTitle($) {
const title = $("title").first().text().trim();
} catch {
return {
exists: title.length > 0,
text: title,
length: title.length,
status:
title.length >= 30 && title.length <= 60
? "optimal"
: title.length < 30
? "too_short"
: "too_long",
exists: false,
value: null,
declaredInMeta: false,
};
}
}
// Meta Description Analysis
function analyzeMetaDescription($) {
const desc = $('meta[name="description"]').attr("content") || "";
return {
exists: desc.length > 0,
text: desc,
length: desc.length,
status:
desc.length >= 50 && desc.length <= 160
? "optimal"
: desc.length < 50
? "too_short"
: "too_long",
async function analyzeHeadings(page) {
const getHeadingTexts = async (selector) => {
return page.$$eval(selector, (els) =>
els.map((el) => el.textContent.trim())
);
};
return {
h1: {
count: await page.$$eval("h1", (els) => els.length),
texts: await getHeadingTexts("h1"),
},
h2: {
count: await page.$$eval("h2", (els) => els.length),
texts: await getHeadingTexts("h2"),
},
h3: {
count: await page.$$eval("h3", (els) => els.length),
texts: await getHeadingTexts("h3"),
},
};
}
async function analyzeImages(page) {
const images = await page.$$("img");
const withAlt = await page.$$eval(
"img",
(imgs) => imgs.filter((img) => img.alt && img.alt.trim() !== "").length
);
return {
total: images.length,
withAlt,
withoutAlt: images.length - withAlt,
percentageWithAlt:
images.length > 0 ? Math.round((withAlt / images.length) * 100) : 100,
};
}
async function analyzeLinks(page, baseUrl) {
const links = await page.$$("a[href]");
let internal = 0;
let external = 0;
let nofollow = 0;
try {
const baseDomain = new URL(baseUrl).hostname.replace("www.", "");
for (const link of links) {
const href = await link.evaluate((el) => el.getAttribute("href"));
const rel = await link.evaluate((el) => el.getAttribute("rel") || "");
if (rel.includes("nofollow")) nofollow++;
try {
const url = new URL(href, baseUrl);
if (url.hostname.replace("www.", "") === baseDomain) {
internal++;
} else {
external++;
}
} catch {
internal++; // Relative links
}
}
} catch (e) {
console.error("Link analysis error:", e);
}
// Meta Robots Analysis
function analyzeMetaRobots($) {
const content = $('meta[name="robots"]').attr("content") || "";
return {
exists: content.length > 0,
content,
noindex: content.includes("noindex"),
nofollow: content.includes("nofollow"),
total: links.length,
internal,
external,
nofollow,
nofollowPercentage:
links.length > 0 ? Math.round((nofollow / links.length) * 100) : 0,
};
}
async function analyzeCanonical(page) {
const canonical =
(await page
.$eval('link[rel="canonical"]', (el) =>
el ? el.getAttribute("href") : null
)
.catch(() => null)) || "";
const ogUrl = (await getMetaContent(page, "og:url")) || "";
return {
exists: canonical.length > 0,
url: canonical,
isSelf: canonical === ogUrl,
};
}
async function analyzeSchemaMarkup(page) {
const schemas = await page.$$('script[type="application/ld+json"]');
const types = [];
for (const schema of schemas) {
try {
const jsonText = await schema.evaluate((el) => el.textContent);
const json = JSON.parse(jsonText);
if (json["@type"]) types.push(json["@type"]);
} catch (e) {
console.error("Schema parsing error:", e);
}
}
// Viewport Analysis
function analyzeViewport($) {
const viewport = $('meta[name="viewport"]').attr("content") || "";
return {
exists: viewport.length > 0,
content: viewport,
mobileFriendly: viewport.includes("width=device-width"),
count: schemas.length,
types: [...new Set(types)], // Unique types only
};
}
}
// Text Analysis Functions
function calculateReadabilityScore(text) {
async function analyzeLanguage(page) {
return page.$eval("html", (el) => el.getAttribute("lang")).catch(() => null);
}
async function analyzeDoctype(page) {
return page.evaluate(() => {
const doctype = document.doctype;
return doctype
? `<!DOCTYPE ${doctype.name}` +
(doctype.publicId ? ` PUBLIC "${doctype.publicId}"` : "") +
(doctype.systemId ? ` "${doctype.systemId}"` : "") +
">"
: null;
});
}
function calculateReadabilityScore(text) {
// Simple readability score calculation (Flesch-Kincaid approximation)
const words = text
.trim()
@ -208,14 +409,13 @@ export async function POST(request) {
const syllablesPerWord = syllables / words.length;
// Flesch Reading Ease Score
const score =
206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord;
const score = 206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord;
// Normalize to 0-100 scale
return Math.max(0, Math.min(100, Math.round(score)));
}
}
function countSyllables(word) {
function countSyllables(word) {
// Simple syllable counting approximation
word = word.toLowerCase().replace(/[^a-z]/g, "");
if (word.length <= 3) return 1;
@ -224,265 +424,25 @@ export async function POST(request) {
syllables -= word.match(/e$/) ? 1 : 0; // Silent e
syllables -= word.match(/[aeiouy]{2,}/g)?.length || 0; // Diphthongs
return Math.max(1, syllables);
}
}
// Content Analysis
function analyzeContent($) {
const bodyText = $("body").text();
async function analyzeContent(page) {
const bodyText = await page.$eval("body", (el) => el.textContent);
const words = bodyText
.trim()
.split(/\s+/)
.filter((word) => word.length > 0);
const paragraphs = await page.$$("p");
const lists = await page.$$("ul, ol");
return {
wordCount: words.length,
textLength: bodyText.length,
readability: calculateReadabilityScore(bodyText),
paragraphCount: $("p").length,
listCount: $("ul, ol").length,
paragraphCount: paragraphs.length,
listCount: lists.length,
};
}
// Perform comprehensive analysis
const analysis = {
url: finalUrl,
pageLoadTime: (Date.now() - startTime) / 1000,
title: analyzeTitle($),
meta: {
description: analyzeMetaDescription($),
robots: analyzeMetaRobots($),
viewport: analyzeViewport($),
charset: analyzeCharset($),
keywords: $('meta[name="keywords"]').attr("content") || null,
},
headings: analyzeHeadings($),
images: analyzeImages($),
links: analyzeLinks($, finalUrl),
content: analyzeContent($),
technical: {
canonical: analyzeCanonical($),
language: analyzeLanguage($),
schemaMarkup: analyzeSchemaMarkup($),
doctype: analyzeDoctype($),
},
social: {
openGraph: analyzeOpenGraph($),
twitterCard: analyzeTwitterCards($),
},
security: securityHeaders,
analyzedAt: new Date().toISOString(),
};
return NextResponse.json(analysis);
} catch (error) {
console.error("SEO analysis error:", error);
return NextResponse.json(
{
error: "Internal server error during analysis",
details:
process.env.NODE_ENV === "development" ? error.stack : undefined,
},
{ status: 500 }
);
}
}
// Analysis Functions
function analyzeTitle($) {
const title = $("title").first().text().trim();
return {
exists: title.length > 0,
text: title,
length: title.length,
status:
title.length >= 30 && title.length <= 60
? "optimal"
: title.length < 30
? "too_short"
: "too_long",
};
}
function analyzeMetaDescription($) {
const desc = $('meta[name="description"]').attr("content") || "";
return {
exists: desc.length > 0,
text: desc,
length: desc.length,
status:
desc.length >= 50 && desc.length <= 160
? "optimal"
: desc.length < 50
? "too_short"
: "too_long",
};
}
function analyzeMetaRobots($) {
const content = $('meta[name="robots"]').attr("content") || "";
return {
exists: content.length > 0,
content,
noindex: content.includes("noindex"),
nofollow: content.includes("nofollow"),
};
}
function analyzeViewport($) {
const viewport = $('meta[name="viewport"]').attr("content") || "";
return {
exists: viewport.length > 0,
content: viewport,
mobileFriendly: viewport.includes("width=device-width"),
};
}
function analyzeHeadings($) {
return {
h1: {
count: $("h1").length,
texts: $("h1")
.map((i, el) => $(el).text().trim())
.get(),
},
h2: {
count: $("h2").length,
texts: $("h2")
.map((i, el) => $(el).text().trim())
.get(),
},
h3: {
count: $("h3").length,
texts: $("h3")
.map((i, el) => $(el).text().trim())
.get(),
},
};
}
function analyzeImages($) {
const images = $("img");
const withAlt = images.filter((i, el) => {
const alt = $(el).attr("alt");
return alt && alt.trim() !== "";
}).length;
return {
total: images.length,
withAlt,
withoutAlt: images.length - withAlt,
percentageWithAlt:
images.length > 0 ? Math.round((withAlt / images.length) * 100) : 100,
};
}
function analyzeLinks($, baseUrl) {
const links = $("a[href]");
let internal = 0;
let external = 0;
let nofollow = 0;
try {
const baseDomain = new URL(baseUrl).hostname.replace("www.", "");
links.each((i, el) => {
const href = $(el).attr("href");
const rel = $(el).attr("rel") || "";
if (rel.includes("nofollow")) nofollow++;
try {
const url = new URL(href, baseUrl);
if (url.hostname.replace("www.", "") === baseDomain) {
internal++;
} else {
external++;
}
} catch {
internal++; // Relative links
}
});
} catch (e) {
console.error("Link analysis error:", e);
}
return {
total: links.length,
internal,
external,
nofollow,
nofollowPercentage:
links.length > 0 ? Math.round((nofollow / links.length) * 100) : 0,
};
}
function analyzeContent($) {
const bodyText = $("body").text();
const words = bodyText
.trim()
.split(/\s+/)
.filter((word) => word.length > 0);
return {
wordCount: words.length,
textLength: bodyText.length,
readability: calculateReadabilityScore(words), // Implement your own formula
};
}
function analyzeCanonical($) {
const canonical = $('link[rel="canonical"]').attr("href") || "";
return {
exists: canonical.length > 0,
url: canonical,
isSelf: canonical === $('meta[property="og:url"]').attr("content"),
};
}
function analyzeSchemaMarkup($) {
const schemas = $('script[type="application/ld+json"]');
const types = [];
schemas.each((i, el) => {
try {
const json = JSON.parse($(el).text());
if (json["@type"]) types.push(json["@type"]);
} catch (e) {
console.error("Schema parsing error:", e);
}
});
return {
count: schemas.length,
types: [...new Set(types)], // Unique types only
};
}
function analyzeOpenGraph($) {
return {
title: $('meta[property="og:title"]').attr("content") || "",
description: $('meta[property="og:description"]').attr("content") || "",
image: $('meta[property="og:image"]').attr("content") || "",
url: $('meta[property="og:url"]').attr("content") || "",
};
}
function analyzeTwitterCards($) {
return {
card: $('meta[name="twitter:card"]').attr("content") || "",
title: $('meta[name="twitter:title"]').attr("content") || "",
description: $('meta[name="twitter:description"]').attr("content") || "",
image: $('meta[name="twitter:image"]').attr("content") || "",
};
}
function analyzeLanguage($) {
return $("html").attr("lang") || null;
}
function analyzeDoctype($) {
const doctype = $("html")[0]?.prev?.data;
return doctype?.includes("<!DOCTYPE") ? doctype : null;
}
export const dynamic = "force-dynamic";

View File

@ -20,7 +20,7 @@ export default function Home() {
<footer className="bg-white py-8 border-t">
<div className="container mx-auto px-4 text-center text-gray-500">
<p>© {new Date().getFullYear()} SEO Analyzer Tool</p>
<p>© {new Date().getFullYear()} RankRunners. All Rights Reserved</p>
</div>
</footer>
</Layout>