- Introduced a new crawling management feature allowing users to configure, execute, and log web crawls. - Added CRUD operations for crawl configurations, including URL analysis and preview capabilities. - Implemented a new service for handling crawling logic and scheduling tasks. - Integrated cheerio for HTML parsing and axios for HTTP requests. - Created a sample HTML page for testing crawling functionality. This commit enhances the application's data collection capabilities from external websites.
125 lines
4.8 KiB
TypeScript
125 lines
4.8 KiB
TypeScript
import { Request, Response } from "express";
|
|
import { CrawlService } from "../services/crawlService";
|
|
import { logger } from "../utils/logger";
|
|
|
|
interface AuthenticatedRequest extends Request {
|
|
user?: { companyCode: string; userId: string };
|
|
}
|
|
|
|
// 설정 목록 조회
|
|
export async function getCrawlConfigs(req: AuthenticatedRequest, res: Response) {
|
|
try {
|
|
const companyCode = req.user?.companyCode || "*";
|
|
const configs = await CrawlService.getConfigs(companyCode);
|
|
return res.json({ success: true, data: configs });
|
|
} catch (error: any) {
|
|
logger.error("크롤링 설정 조회 실패:", error);
|
|
return res.status(500).json({ success: false, message: error.message });
|
|
}
|
|
}
|
|
|
|
// 설정 상세 조회
|
|
export async function getCrawlConfig(req: AuthenticatedRequest, res: Response) {
|
|
try {
|
|
const config = await CrawlService.getConfigById(req.params.id);
|
|
if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." });
|
|
return res.json({ success: true, data: config });
|
|
} catch (error: any) {
|
|
logger.error("크롤링 설정 상세 조회 실패:", error);
|
|
return res.status(500).json({ success: false, message: error.message });
|
|
}
|
|
}
|
|
|
|
// 설정 생성
|
|
export async function createCrawlConfig(req: AuthenticatedRequest, res: Response) {
|
|
try {
|
|
const data = {
|
|
...req.body,
|
|
company_code: req.user?.companyCode || req.body.company_code,
|
|
writer: req.user?.userId,
|
|
};
|
|
const config = await CrawlService.createConfig(data);
|
|
return res.json({ success: true, data: config });
|
|
} catch (error: any) {
|
|
logger.error("크롤링 설정 생성 실패:", error);
|
|
return res.status(500).json({ success: false, message: error.message });
|
|
}
|
|
}
|
|
|
|
// 설정 수정
|
|
export async function updateCrawlConfig(req: AuthenticatedRequest, res: Response) {
|
|
try {
|
|
const config = await CrawlService.updateConfig(req.params.id, req.body);
|
|
if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." });
|
|
return res.json({ success: true, data: config });
|
|
} catch (error: any) {
|
|
logger.error("크롤링 설정 수정 실패:", error);
|
|
return res.status(500).json({ success: false, message: error.message });
|
|
}
|
|
}
|
|
|
|
// 설정 삭제
|
|
export async function deleteCrawlConfig(req: AuthenticatedRequest, res: Response) {
|
|
try {
|
|
await CrawlService.deleteConfig(req.params.id);
|
|
return res.json({ success: true });
|
|
} catch (error: any) {
|
|
logger.error("크롤링 설정 삭제 실패:", error);
|
|
return res.status(500).json({ success: false, message: error.message });
|
|
}
|
|
}
|
|
|
|
// 미리보기
|
|
export async function previewCrawl(req: AuthenticatedRequest, res: Response) {
|
|
try {
|
|
const { url, row_selector, column_mappings, method, headers, request_body } = req.body;
|
|
if (!url) return res.status(400).json({ success: false, message: "URL은 필수입니다." });
|
|
|
|
const result = await CrawlService.preview(url, row_selector, column_mappings || [], method, headers, request_body);
|
|
return res.json({ success: true, data: result });
|
|
} catch (error: any) {
|
|
logger.error("크롤링 미리보기 실패:", error);
|
|
return res.status(500).json({ success: false, message: error.message });
|
|
}
|
|
}
|
|
|
|
// URL 자동 분석 — 페이지의 테이블/리스트 구조를 감지
|
|
export async function analyzeUrl(req: AuthenticatedRequest, res: Response) {
|
|
try {
|
|
const { url } = req.body;
|
|
if (!url) return res.status(400).json({ success: false, message: "URL은 필수입니다." });
|
|
|
|
const result = await CrawlService.analyzeUrl(url);
|
|
return res.json({ success: true, data: result });
|
|
} catch (error: any) {
|
|
logger.error("URL 분석 실패:", error);
|
|
return res.status(500).json({ success: false, message: error.message });
|
|
}
|
|
}
|
|
|
|
// 수동 실행
|
|
export async function executeCrawl(req: AuthenticatedRequest, res: Response) {
|
|
try {
|
|
const config = await CrawlService.getConfigById(req.params.id);
|
|
if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." });
|
|
|
|
const result = await CrawlService.executeCrawl(config);
|
|
return res.json({ success: true, data: result });
|
|
} catch (error: any) {
|
|
logger.error("크롤링 수동 실행 실패:", error);
|
|
return res.status(500).json({ success: false, message: error.message });
|
|
}
|
|
}
|
|
|
|
// 실행 로그 조회
|
|
export async function getCrawlLogs(req: AuthenticatedRequest, res: Response) {
|
|
try {
|
|
const limit = parseInt(req.query.limit as string) || 20;
|
|
const logs = await CrawlService.getLogs(req.params.id, limit);
|
|
return res.json({ success: true, data: logs });
|
|
} catch (error: any) {
|
|
logger.error("크롤링 로그 조회 실패:", error);
|
|
return res.status(500).json({ success: false, message: error.message });
|
|
}
|
|
}
|