feat: add web crawling management functionality
- Introduced a new crawling management feature allowing users to configure, execute, and log web crawls. - Added CRUD operations for crawl configurations, including URL analysis and preview capabilities. - Implemented a new service for handling crawling logic and scheduling tasks. - Integrated cheerio for HTML parsing and axios for HTTP requests. - Created a sample HTML page for testing crawling functionality. This commit enhances the application's data collection capabilities from external websites.
This commit is contained in:
124
backend-node/src/controllers/crawlController.ts
Normal file
124
backend-node/src/controllers/crawlController.ts
Normal file
@@ -0,0 +1,124 @@
|
||||
import { Request, Response } from "express";
|
||||
import { CrawlService } from "../services/crawlService";
|
||||
import { logger } from "../utils/logger";
|
||||
|
||||
interface AuthenticatedRequest extends Request {
|
||||
user?: { companyCode: string; userId: string };
|
||||
}
|
||||
|
||||
// 설정 목록 조회
|
||||
export async function getCrawlConfigs(req: AuthenticatedRequest, res: Response) {
|
||||
try {
|
||||
const companyCode = req.user?.companyCode || "*";
|
||||
const configs = await CrawlService.getConfigs(companyCode);
|
||||
return res.json({ success: true, data: configs });
|
||||
} catch (error: any) {
|
||||
logger.error("크롤링 설정 조회 실패:", error);
|
||||
return res.status(500).json({ success: false, message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// 설정 상세 조회
|
||||
export async function getCrawlConfig(req: AuthenticatedRequest, res: Response) {
|
||||
try {
|
||||
const config = await CrawlService.getConfigById(req.params.id);
|
||||
if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." });
|
||||
return res.json({ success: true, data: config });
|
||||
} catch (error: any) {
|
||||
logger.error("크롤링 설정 상세 조회 실패:", error);
|
||||
return res.status(500).json({ success: false, message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// 설정 생성
|
||||
export async function createCrawlConfig(req: AuthenticatedRequest, res: Response) {
|
||||
try {
|
||||
const data = {
|
||||
...req.body,
|
||||
company_code: req.user?.companyCode || req.body.company_code,
|
||||
writer: req.user?.userId,
|
||||
};
|
||||
const config = await CrawlService.createConfig(data);
|
||||
return res.json({ success: true, data: config });
|
||||
} catch (error: any) {
|
||||
logger.error("크롤링 설정 생성 실패:", error);
|
||||
return res.status(500).json({ success: false, message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// 설정 수정
|
||||
export async function updateCrawlConfig(req: AuthenticatedRequest, res: Response) {
|
||||
try {
|
||||
const config = await CrawlService.updateConfig(req.params.id, req.body);
|
||||
if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." });
|
||||
return res.json({ success: true, data: config });
|
||||
} catch (error: any) {
|
||||
logger.error("크롤링 설정 수정 실패:", error);
|
||||
return res.status(500).json({ success: false, message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// 설정 삭제
|
||||
export async function deleteCrawlConfig(req: AuthenticatedRequest, res: Response) {
|
||||
try {
|
||||
await CrawlService.deleteConfig(req.params.id);
|
||||
return res.json({ success: true });
|
||||
} catch (error: any) {
|
||||
logger.error("크롤링 설정 삭제 실패:", error);
|
||||
return res.status(500).json({ success: false, message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// 미리보기
|
||||
export async function previewCrawl(req: AuthenticatedRequest, res: Response) {
|
||||
try {
|
||||
const { url, row_selector, column_mappings, method, headers, request_body } = req.body;
|
||||
if (!url) return res.status(400).json({ success: false, message: "URL은 필수입니다." });
|
||||
|
||||
const result = await CrawlService.preview(url, row_selector, column_mappings || [], method, headers, request_body);
|
||||
return res.json({ success: true, data: result });
|
||||
} catch (error: any) {
|
||||
logger.error("크롤링 미리보기 실패:", error);
|
||||
return res.status(500).json({ success: false, message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// URL 자동 분석 — 페이지의 테이블/리스트 구조를 감지
|
||||
export async function analyzeUrl(req: AuthenticatedRequest, res: Response) {
|
||||
try {
|
||||
const { url } = req.body;
|
||||
if (!url) return res.status(400).json({ success: false, message: "URL은 필수입니다." });
|
||||
|
||||
const result = await CrawlService.analyzeUrl(url);
|
||||
return res.json({ success: true, data: result });
|
||||
} catch (error: any) {
|
||||
logger.error("URL 분석 실패:", error);
|
||||
return res.status(500).json({ success: false, message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// 수동 실행
|
||||
export async function executeCrawl(req: AuthenticatedRequest, res: Response) {
|
||||
try {
|
||||
const config = await CrawlService.getConfigById(req.params.id);
|
||||
if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." });
|
||||
|
||||
const result = await CrawlService.executeCrawl(config);
|
||||
return res.json({ success: true, data: result });
|
||||
} catch (error: any) {
|
||||
logger.error("크롤링 수동 실행 실패:", error);
|
||||
return res.status(500).json({ success: false, message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// 실행 로그 조회
|
||||
export async function getCrawlLogs(req: AuthenticatedRequest, res: Response) {
|
||||
try {
|
||||
const limit = parseInt(req.query.limit as string) || 20;
|
||||
const logs = await CrawlService.getLogs(req.params.id, limit);
|
||||
return res.json({ success: true, data: logs });
|
||||
} catch (error: any) {
|
||||
logger.error("크롤링 로그 조회 실패:", error);
|
||||
return res.status(500).json({ success: false, message: error.message });
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user