6 changed files with 8268 additions and 0 deletions
@ -0,0 +1,175 @@ |
|||
package com.ruoyi.api.controller; |
|||
|
|||
import org.apache.xmlbeans.XmlCursor; |
|||
import org.apache.poi.openxml4j.opc.OPCPackage; |
|||
import org.apache.poi.xwpf.usermodel.*; |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.parser.Parser; |
|||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; |
|||
import org.springframework.web.bind.annotation.GetMapping; |
|||
import org.springframework.web.bind.annotation.RequestMapping; |
|||
import org.springframework.web.bind.annotation.RestController; |
|||
import org.apache.poi.ooxml.POIXMLDocumentPart; |
|||
import org.apache.poi.xwpf.usermodel.XWPFPictureData; |
|||
|
|||
import java.util.Base64; |
|||
import java.util.List; |
|||
import java.util.logging.Logger; |
|||
|
|||
@RestController |
|||
@RequestMapping("/api") |
|||
public class MathMLController { |
|||
|
|||
private static final Logger log = Logger.getLogger(MathMLController.class.getName()); |
|||
|
|||
@GetMapping("/getWordToHtml") |
|||
public String t1() { |
|||
String filePath = "C:\\D\\test.docx"; // ✅ 你的文件路径
|
|||
StringBuilder html = new StringBuilder(); |
|||
|
|||
// 添加 MathJax 支持 MathML 渲染
|
|||
|
|||
try (OPCPackage opc = OPCPackage.open(filePath); |
|||
XWPFDocument doc = new XWPFDocument(opc)) { |
|||
|
|||
List<XWPFParagraph> paragraphs = doc.getParagraphs(); |
|||
List<XWPFTable> tables = doc.getTables(); |
|||
|
|||
// 处理段落
|
|||
for (XWPFParagraph para : paragraphs) { |
|||
handleParagraph(para, html); |
|||
} |
|||
|
|||
// 处理表格(含公式)
|
|||
for (XWPFTable table : tables) { |
|||
html.append("<table border='1' style='border-collapse: collapse; margin: 10px 0;'>"); |
|||
for (XWPFTableRow row : table.getRows()) { |
|||
html.append("<tr>"); |
|||
for (XWPFTableCell cell : row.getTableCells()) { |
|||
html.append("<td style='padding: 8px;'>"); |
|||
for (XWPFParagraph cellPara : cell.getParagraphs()) { |
|||
handleParagraph(cellPara, html); |
|||
} |
|||
html.append("</td>"); |
|||
} |
|||
html.append("</tr>"); |
|||
} |
|||
html.append("</table>"); |
|||
} |
|||
|
|||
} catch (Exception e) { |
|||
html.append("<p style='color:red;'>").append(e.getMessage()).append("</p>"); |
|||
} |
|||
System.out.println(html.toString()); |
|||
return html.toString(); |
|||
} |
|||
private void handleParagraph(XWPFParagraph para, StringBuilder html) { |
|||
System.out.println("处理段落: " + para.getText()); |
|||
|
|||
CTP ctp = para.getCTP(); |
|||
XmlCursor cursor = ctp.newCursor(); |
|||
cursor.selectPath("./*"); |
|||
|
|||
StringBuilder paraHtml = new StringBuilder("<p>"); |
|||
|
|||
while (cursor.toNextSelection()) { |
|||
String nodeXml = cursor.xmlText(); |
|||
|
|||
if (nodeXml.contains("<m:oMath") || nodeXml.contains("<m:oMathPara")) { |
|||
System.out.println("✅ 发现公式!"); |
|||
String mathml = MathMLConverter.convertOmmlToMathML(nodeXml); |
|||
paraHtml.append(mathml); |
|||
} else if (nodeXml.startsWith("<w:r ")) { |
|||
// 处理文本 run(可能包含文字、图片)
|
|||
processRunXml(nodeXml, para, paraHtml); |
|||
} |
|||
} |
|||
cursor.dispose(); |
|||
|
|||
if (paraHtml.length() > 3) { |
|||
paraHtml.append("</p>"); |
|||
html.append(paraHtml); |
|||
} |
|||
} |
|||
|
|||
private void processRunXml(String xml, XWPFParagraph para, StringBuilder html) { |
|||
try { |
|||
// 使用 Jsoup 解析 XML
|
|||
Document doc = Jsoup.parse(xml, "", Parser.xmlParser()); |
|||
Element wR = doc.child(0); |
|||
|
|||
// 查找 blip 标签中的 r:embed
|
|||
Element blip = wR.selectFirst("a|blip"); |
|||
String relId = null; |
|||
if (blip != null) { |
|||
relId = blip.attr("r:embed"); |
|||
} |
|||
|
|||
// 如果没找到,尝试从其他标签找 r:embed
|
|||
if ((relId == null || relId.isEmpty())) { |
|||
for (Element e : wR.select("[r|embed]")) { |
|||
relId = e.attr("r:embed"); |
|||
if (relId != null && !relId.isEmpty()) break; |
|||
} |
|||
} |
|||
|
|||
// 情况1:找到了 relId → 提取图片
|
|||
if (relId != null && !relId.isEmpty()) { |
|||
POIXMLDocumentPart imagePart = para.getDocument().getRelationById(relId); |
|||
if (imagePart instanceof XWPFPictureData) { |
|||
XWPFPictureData pictureData = (XWPFPictureData) imagePart; |
|||
byte[] imageData = pictureData.getData(); |
|||
|
|||
String ext; |
|||
int pictureType = pictureData.getPictureType(); |
|||
if (pictureType == XWPFDocument.PICTURE_TYPE_JPEG) { |
|||
ext = "jpeg"; |
|||
} else if (pictureType == XWPFDocument.PICTURE_TYPE_PNG) { |
|||
ext = "png"; |
|||
} else if (pictureType == XWPFDocument.PICTURE_TYPE_GIF) { |
|||
ext = "gif"; |
|||
} else if (pictureType == XWPFDocument.PICTURE_TYPE_BMP) { |
|||
ext = "bmp"; |
|||
} else { |
|||
ext = "png"; // 默认值
|
|||
} |
|||
|
|||
String base64 = Base64.getEncoder().encodeToString(imageData); |
|||
String imgUrl = "data:image/" + ext + ";base64," + base64; |
|||
html.append("<img src=\"").append(imgUrl) |
|||
.append("\" style=\"max-width:100%;height:auto;vertical-align:middle;\" />"); |
|||
return; |
|||
} |
|||
} |
|||
|
|||
// 情况2:没有图片,提取文本
|
|||
String text = extractTextFromRunXml(xml); |
|||
if (text != null && !text.trim().isEmpty()) { |
|||
html.append(text); |
|||
} |
|||
|
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
html.append("<span style='color:red;'>[图片解析错误]</span>"); |
|||
} |
|||
} |
|||
|
|||
// 辅助方法:从 <w:r> XML 中提取文本
|
|||
private String extractTextFromRunXml(String xml) { |
|||
try { |
|||
javax.xml.parsers.DocumentBuilder db = javax.xml.parsers.DocumentBuilderFactory.newInstance().newDocumentBuilder(); |
|||
org.w3c.dom.Document doc = db.parse(new java.io.ByteArrayInputStream(xml.getBytes())); |
|||
org.w3c.dom.NodeList texts = doc.getElementsByTagName("w:t"); |
|||
StringBuilder sb = new StringBuilder(); |
|||
for (int i = 0; i < texts.getLength(); i++) { |
|||
sb.append(texts.item(i).getTextContent()); |
|||
} |
|||
return sb.toString(); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
return ""; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,100 @@ |
|||
package com.ruoyi.api.controller; |
|||
|
|||
import lombok.var; |
|||
import net.sf.saxon.s9api.*; |
|||
|
|||
import javax.xml.transform.*; |
|||
import javax.xml.transform.stream.StreamResult; |
|||
import javax.xml.transform.stream.StreamSource; |
|||
import java.io.InputStream; |
|||
import java.io.StringReader; |
|||
import java.io.StringWriter; |
|||
|
|||
public class MathMLConverter { |
|||
|
|||
private static final Processor processor = new Processor(false); |
|||
private static final XsltCompiler xsltCompiler = processor.newXsltCompiler(); // ✅ 正确类型
|
|||
|
|||
// 加载你已有的 OMML2MML.XSL
|
|||
private static final XsltExecutable executable; |
|||
|
|||
static { |
|||
try (var xslStream = MathMLConverter.class.getResourceAsStream("/OMML2MML.XSL")) { |
|||
if (xslStream == null) { |
|||
throw new RuntimeException("❌ XSLT file '/OMML2MML.XSL' not found in resources!"); |
|||
} |
|||
executable = xsltCompiler.compile(new StreamSource(xslStream)); // ✅ 使用 xsltCompiler
|
|||
} catch (Exception e) { |
|||
throw new ExceptionInInitializerError("❌ Failed to load XSLT: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 将 OMML XML 字符串转换为 MathML |
|||
*/ |
|||
public static String convertOmmlToMathML(String omml) { |
|||
try { |
|||
// 加载 XSL 文件
|
|||
InputStream xslStream = MathMLConverter.class.getClassLoader() |
|||
.getResourceAsStream("OMML2MML.XSL"); |
|||
if (xslStream == null) { |
|||
throw new RuntimeException("XSL 文件未找到,请检查路径:OMML2MML.XSL"); |
|||
} |
|||
|
|||
Source xslSource = new StreamSource(xslStream); |
|||
Transformer transformer = TransformerFactory.newInstance().newTransformer(xslSource); |
|||
|
|||
// 设置输出属性
|
|||
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); // ❌ 不要 XML 声明
|
|||
transformer.setOutputProperty(OutputKeys.INDENT, "no"); |
|||
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "0"); |
|||
|
|||
// 输入 OMML
|
|||
Source source = new StreamSource(new StringReader(omml)); |
|||
|
|||
// 输出
|
|||
StringWriter stringWriter = new StringWriter(); |
|||
Result result = new StreamResult(stringWriter); |
|||
transformer.transform(source, result); |
|||
|
|||
String mathml = stringWriter.toString().trim(); |
|||
|
|||
// 🔧 清理和修复 MathML
|
|||
mathml = cleanupMathML(mathml); |
|||
|
|||
return mathml; |
|||
|
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
return "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mtext>公式解析错误</mtext></math>"; |
|||
} |
|||
} |
|||
|
|||
// 新增:清理 MathML 输出
|
|||
private static String cleanupMathML(String mathml) { |
|||
if (mathml == null || mathml.isEmpty()) { |
|||
return ""; |
|||
} |
|||
|
|||
// 1. 移除 XML 声明(如果还在)
|
|||
mathml = mathml.replaceFirst("(?s)<!\\?xml.*?\\?>\\s*", ""); |
|||
|
|||
// 2. 替换 mml:math -> math,并修复命名空间
|
|||
mathml = mathml.replaceAll("mml:math", "math"); |
|||
mathml = mathml.replaceAll("xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"", "xmlns=\"http://www.w3.org/1998/Math/MathML\""); |
|||
mathml = mathml.replaceAll("xmlns:m=\"[^\"]*\"", ""); // 移除 m: 命名空间
|
|||
|
|||
// 3. 移除多余的 mml: 前缀
|
|||
mathml = mathml.replaceAll("mml:([a-z])", "$1"); // mml:mi -> mi, mml:mo -> mo 等
|
|||
|
|||
// 4. 确保根元素是 <math xmlns="...">
|
|||
if (mathml.startsWith("<math ") && !mathml.contains("xmlns=\"http://www.w3.org/1998/Math/MathML\"")) { |
|||
mathml = mathml.replaceFirst("<math ", "<math xmlns=\"http://www.w3.org/1998/Math/MathML\" "); |
|||
} else if (!mathml.startsWith("<math")) { |
|||
// 如果根标签不是 math,手动包装
|
|||
mathml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\">" + mathml + "</math>"; |
|||
} |
|||
|
|||
return mathml.trim(); |
|||
} |
|||
} |
|||
File diff suppressed because it is too large
File diff suppressed because it is too large
File diff suppressed because it is too large
Loading…
Reference in new issue