6 changed files with 8268 additions and 0 deletions
@ -0,0 +1,175 @@ |
|||||
|
package com.ruoyi.api.controller; |
||||
|
|
||||
|
import org.apache.xmlbeans.XmlCursor; |
||||
|
import org.apache.poi.openxml4j.opc.OPCPackage; |
||||
|
import org.apache.poi.xwpf.usermodel.*; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.parser.Parser; |
||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; |
||||
|
import org.springframework.web.bind.annotation.GetMapping; |
||||
|
import org.springframework.web.bind.annotation.RequestMapping; |
||||
|
import org.springframework.web.bind.annotation.RestController; |
||||
|
import org.apache.poi.ooxml.POIXMLDocumentPart; |
||||
|
import org.apache.poi.xwpf.usermodel.XWPFPictureData; |
||||
|
|
||||
|
import java.util.Base64; |
||||
|
import java.util.List; |
||||
|
import java.util.logging.Logger; |
||||
|
|
||||
|
@RestController |
||||
|
@RequestMapping("/api") |
||||
|
public class MathMLController { |
||||
|
|
||||
|
private static final Logger log = Logger.getLogger(MathMLController.class.getName()); |
||||
|
|
||||
|
@GetMapping("/getWordToHtml") |
||||
|
public String t1() { |
||||
|
String filePath = "C:\\D\\test.docx"; // ✅ 你的文件路径
|
||||
|
StringBuilder html = new StringBuilder(); |
||||
|
|
||||
|
// 添加 MathJax 支持 MathML 渲染
|
||||
|
|
||||
|
try (OPCPackage opc = OPCPackage.open(filePath); |
||||
|
XWPFDocument doc = new XWPFDocument(opc)) { |
||||
|
|
||||
|
List<XWPFParagraph> paragraphs = doc.getParagraphs(); |
||||
|
List<XWPFTable> tables = doc.getTables(); |
||||
|
|
||||
|
// 处理段落
|
||||
|
for (XWPFParagraph para : paragraphs) { |
||||
|
handleParagraph(para, html); |
||||
|
} |
||||
|
|
||||
|
// 处理表格(含公式)
|
||||
|
for (XWPFTable table : tables) { |
||||
|
html.append("<table border='1' style='border-collapse: collapse; margin: 10px 0;'>"); |
||||
|
for (XWPFTableRow row : table.getRows()) { |
||||
|
html.append("<tr>"); |
||||
|
for (XWPFTableCell cell : row.getTableCells()) { |
||||
|
html.append("<td style='padding: 8px;'>"); |
||||
|
for (XWPFParagraph cellPara : cell.getParagraphs()) { |
||||
|
handleParagraph(cellPara, html); |
||||
|
} |
||||
|
html.append("</td>"); |
||||
|
} |
||||
|
html.append("</tr>"); |
||||
|
} |
||||
|
html.append("</table>"); |
||||
|
} |
||||
|
|
||||
|
} catch (Exception e) { |
||||
|
html.append("<p style='color:red;'>").append(e.getMessage()).append("</p>"); |
||||
|
} |
||||
|
System.out.println(html.toString()); |
||||
|
return html.toString(); |
||||
|
} |
||||
|
private void handleParagraph(XWPFParagraph para, StringBuilder html) { |
||||
|
System.out.println("处理段落: " + para.getText()); |
||||
|
|
||||
|
CTP ctp = para.getCTP(); |
||||
|
XmlCursor cursor = ctp.newCursor(); |
||||
|
cursor.selectPath("./*"); |
||||
|
|
||||
|
StringBuilder paraHtml = new StringBuilder("<p>"); |
||||
|
|
||||
|
while (cursor.toNextSelection()) { |
||||
|
String nodeXml = cursor.xmlText(); |
||||
|
|
||||
|
if (nodeXml.contains("<m:oMath") || nodeXml.contains("<m:oMathPara")) { |
||||
|
System.out.println("✅ 发现公式!"); |
||||
|
String mathml = MathMLConverter.convertOmmlToMathML(nodeXml); |
||||
|
paraHtml.append(mathml); |
||||
|
} else if (nodeXml.startsWith("<w:r ")) { |
||||
|
// 处理文本 run(可能包含文字、图片)
|
||||
|
processRunXml(nodeXml, para, paraHtml); |
||||
|
} |
||||
|
} |
||||
|
cursor.dispose(); |
||||
|
|
||||
|
if (paraHtml.length() > 3) { |
||||
|
paraHtml.append("</p>"); |
||||
|
html.append(paraHtml); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void processRunXml(String xml, XWPFParagraph para, StringBuilder html) { |
||||
|
try { |
||||
|
// 使用 Jsoup 解析 XML
|
||||
|
Document doc = Jsoup.parse(xml, "", Parser.xmlParser()); |
||||
|
Element wR = doc.child(0); |
||||
|
|
||||
|
// 查找 blip 标签中的 r:embed
|
||||
|
Element blip = wR.selectFirst("a|blip"); |
||||
|
String relId = null; |
||||
|
if (blip != null) { |
||||
|
relId = blip.attr("r:embed"); |
||||
|
} |
||||
|
|
||||
|
// 如果没找到,尝试从其他标签找 r:embed
|
||||
|
if ((relId == null || relId.isEmpty())) { |
||||
|
for (Element e : wR.select("[r|embed]")) { |
||||
|
relId = e.attr("r:embed"); |
||||
|
if (relId != null && !relId.isEmpty()) break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 情况1:找到了 relId → 提取图片
|
||||
|
if (relId != null && !relId.isEmpty()) { |
||||
|
POIXMLDocumentPart imagePart = para.getDocument().getRelationById(relId); |
||||
|
if (imagePart instanceof XWPFPictureData) { |
||||
|
XWPFPictureData pictureData = (XWPFPictureData) imagePart; |
||||
|
byte[] imageData = pictureData.getData(); |
||||
|
|
||||
|
String ext; |
||||
|
int pictureType = pictureData.getPictureType(); |
||||
|
if (pictureType == XWPFDocument.PICTURE_TYPE_JPEG) { |
||||
|
ext = "jpeg"; |
||||
|
} else if (pictureType == XWPFDocument.PICTURE_TYPE_PNG) { |
||||
|
ext = "png"; |
||||
|
} else if (pictureType == XWPFDocument.PICTURE_TYPE_GIF) { |
||||
|
ext = "gif"; |
||||
|
} else if (pictureType == XWPFDocument.PICTURE_TYPE_BMP) { |
||||
|
ext = "bmp"; |
||||
|
} else { |
||||
|
ext = "png"; // 默认值
|
||||
|
} |
||||
|
|
||||
|
String base64 = Base64.getEncoder().encodeToString(imageData); |
||||
|
String imgUrl = "data:image/" + ext + ";base64," + base64; |
||||
|
html.append("<img src=\"").append(imgUrl) |
||||
|
.append("\" style=\"max-width:100%;height:auto;vertical-align:middle;\" />"); |
||||
|
return; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 情况2:没有图片,提取文本
|
||||
|
String text = extractTextFromRunXml(xml); |
||||
|
if (text != null && !text.trim().isEmpty()) { |
||||
|
html.append(text); |
||||
|
} |
||||
|
|
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
html.append("<span style='color:red;'>[图片解析错误]</span>"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 辅助方法:从 <w:r> XML 中提取文本
|
||||
|
private String extractTextFromRunXml(String xml) { |
||||
|
try { |
||||
|
javax.xml.parsers.DocumentBuilder db = javax.xml.parsers.DocumentBuilderFactory.newInstance().newDocumentBuilder(); |
||||
|
org.w3c.dom.Document doc = db.parse(new java.io.ByteArrayInputStream(xml.getBytes())); |
||||
|
org.w3c.dom.NodeList texts = doc.getElementsByTagName("w:t"); |
||||
|
StringBuilder sb = new StringBuilder(); |
||||
|
for (int i = 0; i < texts.getLength(); i++) { |
||||
|
sb.append(texts.item(i).getTextContent()); |
||||
|
} |
||||
|
return sb.toString(); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
return ""; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,100 @@ |
|||||
|
package com.ruoyi.api.controller; |
||||
|
|
||||
|
import lombok.var; |
||||
|
import net.sf.saxon.s9api.*; |
||||
|
|
||||
|
import javax.xml.transform.*; |
||||
|
import javax.xml.transform.stream.StreamResult; |
||||
|
import javax.xml.transform.stream.StreamSource; |
||||
|
import java.io.InputStream; |
||||
|
import java.io.StringReader; |
||||
|
import java.io.StringWriter; |
||||
|
|
||||
|
public class MathMLConverter { |
||||
|
|
||||
|
private static final Processor processor = new Processor(false); |
||||
|
private static final XsltCompiler xsltCompiler = processor.newXsltCompiler(); // ✅ 正确类型
|
||||
|
|
||||
|
// 加载你已有的 OMML2MML.XSL
|
||||
|
private static final XsltExecutable executable; |
||||
|
|
||||
|
static { |
||||
|
try (var xslStream = MathMLConverter.class.getResourceAsStream("/OMML2MML.XSL")) { |
||||
|
if (xslStream == null) { |
||||
|
throw new RuntimeException("❌ XSLT file '/OMML2MML.XSL' not found in resources!"); |
||||
|
} |
||||
|
executable = xsltCompiler.compile(new StreamSource(xslStream)); // ✅ 使用 xsltCompiler
|
||||
|
} catch (Exception e) { |
||||
|
throw new ExceptionInInitializerError("❌ Failed to load XSLT: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 将 OMML XML 字符串转换为 MathML |
||||
|
*/ |
||||
|
public static String convertOmmlToMathML(String omml) { |
||||
|
try { |
||||
|
// 加载 XSL 文件
|
||||
|
InputStream xslStream = MathMLConverter.class.getClassLoader() |
||||
|
.getResourceAsStream("OMML2MML.XSL"); |
||||
|
if (xslStream == null) { |
||||
|
throw new RuntimeException("XSL 文件未找到,请检查路径:OMML2MML.XSL"); |
||||
|
} |
||||
|
|
||||
|
Source xslSource = new StreamSource(xslStream); |
||||
|
Transformer transformer = TransformerFactory.newInstance().newTransformer(xslSource); |
||||
|
|
||||
|
// 设置输出属性
|
||||
|
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); // ❌ 不要 XML 声明
|
||||
|
transformer.setOutputProperty(OutputKeys.INDENT, "no"); |
||||
|
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "0"); |
||||
|
|
||||
|
// 输入 OMML
|
||||
|
Source source = new StreamSource(new StringReader(omml)); |
||||
|
|
||||
|
// 输出
|
||||
|
StringWriter stringWriter = new StringWriter(); |
||||
|
Result result = new StreamResult(stringWriter); |
||||
|
transformer.transform(source, result); |
||||
|
|
||||
|
String mathml = stringWriter.toString().trim(); |
||||
|
|
||||
|
// 🔧 清理和修复 MathML
|
||||
|
mathml = cleanupMathML(mathml); |
||||
|
|
||||
|
return mathml; |
||||
|
|
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
return "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mtext>公式解析错误</mtext></math>"; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 新增:清理 MathML 输出
|
||||
|
private static String cleanupMathML(String mathml) { |
||||
|
if (mathml == null || mathml.isEmpty()) { |
||||
|
return ""; |
||||
|
} |
||||
|
|
||||
|
// 1. 移除 XML 声明(如果还在)
|
||||
|
mathml = mathml.replaceFirst("(?s)<!\\?xml.*?\\?>\\s*", ""); |
||||
|
|
||||
|
// 2. 替换 mml:math -> math,并修复命名空间
|
||||
|
mathml = mathml.replaceAll("mml:math", "math"); |
||||
|
mathml = mathml.replaceAll("xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"", "xmlns=\"http://www.w3.org/1998/Math/MathML\""); |
||||
|
mathml = mathml.replaceAll("xmlns:m=\"[^\"]*\"", ""); // 移除 m: 命名空间
|
||||
|
|
||||
|
// 3. 移除多余的 mml: 前缀
|
||||
|
mathml = mathml.replaceAll("mml:([a-z])", "$1"); // mml:mi -> mi, mml:mo -> mo 等
|
||||
|
|
||||
|
// 4. 确保根元素是 <math xmlns="...">
|
||||
|
if (mathml.startsWith("<math ") && !mathml.contains("xmlns=\"http://www.w3.org/1998/Math/MathML\"")) { |
||||
|
mathml = mathml.replaceFirst("<math ", "<math xmlns=\"http://www.w3.org/1998/Math/MathML\" "); |
||||
|
} else if (!mathml.startsWith("<math")) { |
||||
|
// 如果根标签不是 math,手动包装
|
||||
|
mathml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\">" + mathml + "</math>"; |
||||
|
} |
||||
|
|
||||
|
return mathml.trim(); |
||||
|
} |
||||
|
} |
||||
File diff suppressed because it is too large
File diff suppressed because it is too large
File diff suppressed because it is too large
Loading…
Reference in new issue