You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
199 lines
6.2 KiB
199 lines
6.2 KiB
import os
|
|
import uuid
|
|
import shutil
|
|
from datetime import datetime
|
|
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from database import get_db
|
|
from models import AuditLog
|
|
from schemas import DocumentUploadOut, DocumentParseResult
|
|
from config import settings
|
|
from dependencies import get_current_user
|
|
|
|
router = APIRouter(prefix="/api/document", tags=["document"])
|
|
|
|
|
|
@router.post("/upload", response_model=DocumentUploadOut)
|
|
async def upload_document(
|
|
file: UploadFile = File(...),
|
|
request: Request = None,
|
|
user: dict = Depends(get_current_user),
|
|
):
|
|
max_size = settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024
|
|
content = await file.read()
|
|
if len(content) > max_size:
|
|
raise HTTPException(400, f"文件大小超过限制 ({settings.MAX_UPLOAD_SIZE_MB}MB)")
|
|
|
|
file_id = uuid.uuid4()
|
|
os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
|
|
|
|
ext = os.path.splitext(file.filename or "unknown")[1]
|
|
stored_name = f"{file_id}{ext}"
|
|
file_path = os.path.join(settings.UPLOAD_DIR, stored_name)
|
|
|
|
with open(file_path, "wb") as f:
|
|
f.write(content)
|
|
|
|
return DocumentUploadOut(
|
|
file_id=file_id,
|
|
filename=file.filename or "unknown",
|
|
file_size=len(content),
|
|
content_type=file.content_type or "application/octet-stream",
|
|
upload_time=datetime.utcnow(),
|
|
)
|
|
|
|
|
|
@router.post("/parse/{file_id}", response_model=DocumentParseResult)
|
|
async def parse_document(
|
|
file_id: uuid.UUID,
|
|
request: Request,
|
|
db: AsyncSession = Depends(get_db),
|
|
user: dict = Depends(get_current_user),
|
|
):
|
|
ext_map = {".txt", ".md", ".py", ".js", ".ts", ".json", ".xml", ".yaml", ".yml", ".csv", ".html", ".css", ".java", ".go", ".rs"}
|
|
os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
|
|
|
|
found_file = None
|
|
found_filename = ""
|
|
for fname in os.listdir(settings.UPLOAD_DIR):
|
|
if fname.startswith(str(file_id)):
|
|
found_file = os.path.join(settings.UPLOAD_DIR, fname)
|
|
found_filename = fname
|
|
break
|
|
|
|
if not found_file:
|
|
raise HTTPException(404, "文件不存在")
|
|
|
|
ext = os.path.splitext(found_filename)[1].lower()
|
|
content = ""
|
|
metadata = {"file_size": os.path.getsize(found_file), "extension": ext}
|
|
|
|
if ext in ext_map:
|
|
with open(found_file, "r", encoding="utf-8", errors="replace") as f:
|
|
content = f.read()
|
|
metadata["lines"] = len(content.splitlines())
|
|
metadata["chars"] = len(content)
|
|
elif ext == ".pdf":
|
|
content = f"[PDF文档解析] 文件: {found_filename}"
|
|
metadata["type"] = "pdf"
|
|
elif ext in {".doc", ".docx"}:
|
|
content = f"[Word文档解析] 文件: {found_filename}"
|
|
metadata["type"] = "word"
|
|
elif ext in {".xls", ".xlsx"}:
|
|
content = f"[Excel文档解析] 文件: {found_filename}"
|
|
metadata["type"] = "excel"
|
|
else:
|
|
content = f"[不支持的文件类型 .{ext}] 文件: {found_filename}"
|
|
metadata["type"] = "unsupported"
|
|
|
|
audit = AuditLog(
|
|
operator_id=uuid.UUID(user["id"]),
|
|
action="document.parse",
|
|
resource="document",
|
|
resource_id=str(file_id),
|
|
detail={"filename": found_filename, "ext": ext},
|
|
ip_address=request.client.host if request.client else None,
|
|
)
|
|
db.add(audit)
|
|
await db.flush()
|
|
|
|
return DocumentParseResult(
|
|
file_id=file_id,
|
|
filename=found_filename,
|
|
content=content,
|
|
metadata=metadata,
|
|
)
|
|
|
|
|
|
@router.delete("/{file_id}")
|
|
async def delete_document(
|
|
file_id: uuid.UUID,
|
|
request: Request,
|
|
db: AsyncSession = Depends(get_db),
|
|
user: dict = Depends(get_current_user),
|
|
):
|
|
os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
|
|
|
|
deleted = False
|
|
for fname in os.listdir(settings.UPLOAD_DIR):
|
|
if fname.startswith(str(file_id)):
|
|
os.remove(os.path.join(settings.UPLOAD_DIR, fname))
|
|
deleted = True
|
|
break
|
|
|
|
if not deleted:
|
|
raise HTTPException(404, "文件不存在")
|
|
|
|
audit = AuditLog(
|
|
operator_id=uuid.UUID(user["id"]),
|
|
action="document.delete",
|
|
resource="document",
|
|
resource_id=str(file_id),
|
|
ip_address=request.client.host if request.client else None,
|
|
)
|
|
db.add(audit)
|
|
await db.flush()
|
|
|
|
return {"code": 200, "message": "已删除"}
|
|
|
|
|
|
@router.post("/format")
|
|
async def format_document(
|
|
payload: dict,
|
|
request: Request,
|
|
db: AsyncSession = Depends(get_db),
|
|
user: dict = Depends(get_current_user),
|
|
):
|
|
content = payload.get("content", "")
|
|
format_type = payload.get("format_type", "standard")
|
|
|
|
result = _apply_formatting(content, format_type)
|
|
|
|
audit = AuditLog(
|
|
operator_id=uuid.UUID(user["id"]),
|
|
action="document.format",
|
|
resource="document",
|
|
resource_id=format_type,
|
|
detail={"format_type": format_type, "original_length": len(content)},
|
|
ip_address=request.client.host if request.client else None,
|
|
)
|
|
db.add(audit)
|
|
await db.flush()
|
|
|
|
return {"code": 200, "data": {"formatted": result, "format_type": format_type}}
|
|
|
|
|
|
def _apply_formatting(content: str, format_type: str) -> str:
|
|
lines = content.splitlines()
|
|
result = []
|
|
|
|
if format_type == "standard":
|
|
for line in lines:
|
|
line = line.strip()
|
|
if line:
|
|
result.append(line)
|
|
return "\n\n".join(result)
|
|
|
|
elif format_type == "markdown":
|
|
result.append(f"# 格式化文档\n\n> 处理时间: {datetime.utcnow().isoformat()}\n")
|
|
for line in lines:
|
|
line = line.strip()
|
|
if line:
|
|
if line.startswith("#"):
|
|
result.append(line)
|
|
elif len(line) < 60 and line.endswith((".", "。", "?", "?", "!", "!")):
|
|
result.append(f"> {line}\n")
|
|
else:
|
|
result.append(line)
|
|
return "\n\n".join(result)
|
|
|
|
elif format_type == "json":
|
|
import json
|
|
try:
|
|
parsed = json.loads(content)
|
|
return json.dumps(parsed, ensure_ascii=False, indent=2)
|
|
except json.JSONDecodeError:
|
|
return json.dumps({"content": content, "lines": len(lines)}, ensure_ascii=False, indent=2)
|
|
|
|
return content
|