Java POI导出富文本的内容到word文档
一、需求:
当创建使用富文本编辑器,操作完的数据,传输到后台都是带有html标签的。
如:<h1>标题头</h1><h2>第二个标题</h2><a href="www.baidu.com">百度搜索</a>
我们想把富文本数据转换为Word内容。
二,依赖
<!-- jsoup依赖 主要是解析图片标签,然后缩放图片大小-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
<!-- poi依赖-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
三、解决方案
Word是完全支持html标签的,但是我们获取到的富文本内容并不是完整的html代码,所有我们需要先补全html标签,然后转码,然后输出。
1,接口类
package com.zl.exportword;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.awt.image.BufferedImage;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
/**
* @author lei
* @version 1.0
* @date 2022/11/14 10:25
*/
@RestController
@RequestMapping("/export")
public class ExportController {
@RequestMapping(value = "/exportWord")
public void export(HttpServletRequest request, HttpServletResponse response) throws Exception {
try {
String tmpContent = "<h1>如何将富文本内容导出到word文档</h1><p style="color:red;font-size:20px;">采用poi将富文本内容导出到word文档</p><div style="background-color:green;">这是有背景颜色的div内容</div>n" +
"<img src="这里写base64后的图片编码">这是base64编码后的图片";
// 获取img图片标签
// 1.Jsoup解析html
Document document = Jsoup.parse(tmpContent);
// 获取所有img图片标签
Elements img = document.getElementsByTag("img");
int index = 0;
List<String> imgBase64List = new ArrayList<>();
for (Element element : img) {
imgBase64List.add(element.attr("src"));
// 处理特殊符号
String attrData = element.attr("src");
// base64编码后可能包含 + 特殊字符,所以需要转义
attrData = attrData.replaceAll("\+", "\\+");
tmpContent = tmpContent.replaceAll(attrData, "{{image_src" + index + "}}");
index++;
}
// 缩放图片大小,然后重新base64编码后替换到富文本内容里面导出word
index = 0;
String prefix = "data:image/png;base64,"; // base64编码前缀
for (String base64 : imgBase64List) {
if (StringUtils.isNotBlank(base64)) {
// 缩小图片
base64 = base64.replaceAll(prefix, "");
BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(base64));
if (bufferedImage == null) {
tmpContent = tmpContent.replaceAll("\{\{image_src" + index + "}}", "");
} else {
int height = bufferedImage.getHeight();
int width = bufferedImage.getWidth();
// 如果图片宽度大于650,图片缩放
//System.out.println("----"+width+"-----"+height);
if (width > 650) {
//高度等比缩放
height = (int)(height*650.0/width);
BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 700, height);
String imageToBase64 = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom));
tmpContent = tmpContent.replaceAll("\{\{image_src" + index + "}}", prefix + imageToBase64);
} else {
tmpContent = tmpContent.replaceAll("\{\{image_src" + index + "}}", prefix + base64);
}
}
} else {
tmpContent = tmpContent.replaceAll("\{\{image_src" + index + "}}", "");
}
index++;
}
// 执行导出操作
WordUtil.exportHtmlToWord(request, response, tmpContent, "富文本内容导出word");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 倒入本地测试
* @throws Exception
*/
@RequestMapping(value = "/export")
public void export() throws Exception {
try {
StringBuilder sb = new StringBuilder("<h1>如何将富文本内容导出到word文档</h1><p style="color:red;font-size:20px;">采用poi将富文本内容导出到word文档</p><div style="background-color:green;">这是有背景颜色的div内容</div>n" +
"<img src="").append("");
//提前转换过base64编码的图片,由于编码文本太长,我提前存储到txt中,再通过io流读取出来
File file = new File("/Users/lei/base.txt");
FileInputStream fileInputStream = new FileInputStream(file);
BufferedInputStream bis = new BufferedInputStream(fileInputStream);
byte[] bytes = new byte[1024];
int len = -1;
while ((len=bis.read(bytes))!=-1){
sb.append(new String(bytes, 0, len));
}
sb.append("">这是base64编码后的图片");
String tmpContent = sb.toString();
// 获取img图片标签
// 1.Jsoup解析html
Document document = Jsoup.parse(tmpContent);
// 获取所有img图片标签
Elements imgs = document.getElementsByTag("img");
int index = 0;
List<String> imgBase64List = new ArrayList<>();
for (Element element : imgs) {
imgBase64List.add(element.attr("src"));
// 处理特殊符号
String attrData = element.attr("src");
// base64编码后可能包含 + 特殊字符,所以需要转义
attrData = attrData.replaceAll("\+", "\\+");
tmpContent = tmpContent.replaceAll(attrData, "{{image_src" + index + "}}");
index++;
}
// 缩放图片大小,然后重新base64编码后替换到富文本内容里面导出word
index = 0;
String prefix = "data:image/png;base64,"; // base64编码前缀
for (String base64 : imgBase64List) {
if (StringUtils.isNotBlank(base64)) {
// 缩小图片
base64 = base64.replaceAll(prefix, "");
BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(base64));
if (bufferedImage == null) {
tmpContent = tmpContent.replaceAll("\{\{image_src" + index + "}}", "");
} else {
int height = bufferedImage.getHeight();
int width = bufferedImage.getWidth();
// 如果图片宽度大于650,图片缩放
System.out.println("----"+width+"-----"+height);
if (width > 650) {
//高度等比缩放
height = (int)(height*650.0/width);
BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 650, height);
String imageToBase64 = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom));
tmpContent = tmpContent.replaceAll("\{\{image_src" + index + "}}", prefix + imageToBase64);
} else {
tmpContent = tmpContent.replaceAll("\{\{image_src" + index + "}}", prefix + base64);
}
}
} else {
tmpContent = tmpContent.replaceAll("\{\{image_src" + index + "}}", "");
}
index++;
}
// 执行导出操作
WordUtil.exportHtmlToWord("/Users/lei/", tmpContent, "富文本内容导出word.docx");
} catch (Exception e) {
e.printStackTrace();
}
}
}
2,工具类
package com.zl.exportword;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
/**
* poi操作word工具类
* @author lei
* @version 1.0
* @date 2022/11/14 10:23
*/
public class WordUtil {
/**
* 导出富文本内容到word
* @param request
* @param response
* @param content 输出内容
* @param fileName 导出文件名称
* @throws Exception
*/
public static void exportHtmlToWord(HttpServletRequest request, HttpServletResponse response, String content, String fileName) throws Exception {
//图片转为base64方法
//String imagebase64 = getImageStr(imagePath);
// 拼接html格式内容
StringBuffer sbf = new StringBuffer();
// 这里拼接一下html标签,便于word文档能够识别
sbf.append("<html " +
"xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"" + //将版式从web版式改成页面试图
">");
sbf.append("<head>" +
"<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val="Cambria Math"/><m:brkBin m:val="before"/><m:brkBinSub m:val="--"/><m:smallFrac m:val="off"/><m:dispDef/><m:lMargin m:val="0"/> <m:rMargin m:val="0"/><m:defJc m:val="centerGroup"/><m:wrapIndent m:val="1440"/><m:intLim m:val="subSup"/><m:naryLim m:val="undOvr"/></m:mathPr></w:WordDocument></xml><![endif]-->" +
"</head>");
sbf.append("<body>");
// 富文本内容
sbf.append(content);
sbf.append("</body></html>");
// 必须要设置编码,避免中文就会乱码
byte[] b = sbf.toString().getBytes("GBK");
// 将字节数组包装到流中
ByteArrayInputStream bais = new ByteArrayInputStream(b);
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
// 这代码不能省略,否则导出乱码。
DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);
//输出文件
request.setCharacterEncoding("utf-8");
// 导出word格式
response.setContentType("application/msword");
response.addHeader("Content-Disposition", "attachment;filename=" +
new String(fileName.getBytes("GB2312"),"iso8859-1") + ".doc");
ServletOutputStream ostream = response.getOutputStream();
poifs.writeFilesystem(ostream);
bais.close();
ostream.close();
}
/**
* 富文本内容到word(本地)
* @param content 输出内容
* @param fileName 导出文件名称
* @throws Exception
*/
public static void exportHtmlToWord(String filepath, String content, String fileName) throws Exception {
// 拼接html格式内容
StringBuffer sbf = new StringBuffer();
// 这里拼接一下html标签,便于word文档能够识别
sbf.append("<html " +
"xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"" + //将版式从web版式改成页面试图
">");
sbf.append("<head>" +
"<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val="Cambria Math"/><m:brkBin m:val="before"/><m:brkBinSub m:val="--"/><m:smallFrac m:val="off"/><m:dispDef/><m:lMargin m:val="0"/> <m:rMargin m:val="0"/><m:defJc m:val="centerGroup"/><m:wrapIndent m:val="1440"/><m:intLim m:val="subSup"/><m:naryLim m:val="undOvr"/></m:mathPr></w:WordDocument></xml><![endif]-->" +
"</head>");
sbf.append("<body>");
// 富文本内容
sbf.append(content);
sbf.append("</body></html>");
// 必须要设置编码,避免中文就会乱码
byte[] b = sbf.toString().getBytes("GBK");
// 将字节数组包装到流中
ByteArrayInputStream bais = new ByteArrayInputStream(b);
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
// 这代码不能省略,否则导出乱码。
DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);
FileOutputStream out = new FileOutputStream(new File(filepath+fileName));
poifs.writeFilesystem(out);
bais.close();
out.close();
}
}
package com.zl.exportword;
import sun.misc.BASE64Decoder;
import sun.misc.BASE64Encoder;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
/**
* 图片处理工具类
* @author lei
* @date 2022/11/14 10:20
* @version 1.0
*/
public class ImageUtils {
/**
* 通过BufferedImage图片流调整图片大小
*/
public static BufferedImage resizeImage(BufferedImage originalImage, int targetWidth, int targetHeight) throws IOException {
Image resultingImage = originalImage.getScaledInstance(targetWidth, targetHeight, Image.SCALE_AREA_AVERAGING);
BufferedImage outputImage = new BufferedImage(targetWidth, targetHeight, BufferedImage.TYPE_INT_RGB);
outputImage.getGraphics().drawImage(resultingImage, 0, 0, null);
return outputImage;
}
/**
* 返回base64图片
* @param data
* @return
*/
public static String imageToBase64(byte[] data) {
BASE64Encoder encoder = new BASE64Encoder();
// 返回Base64编码过的字节数组字符串
return encoder.encode(data);
}
/**
* base64转换成byte数组
* @param base64
* @return
* @throws IOException
*/
public static byte[] base64ToByte(String base64) throws IOException {
BASE64Decoder decoder = new BASE64Decoder();
// 返回Base64编码过的字节数组字符串
return decoder.decodeBuffer(base64);
}
/**
* BufferedImage图片流转byte[]数组
*/
public static byte[] imageToBytes(BufferedImage bImage) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
ImageIO.write(bImage, "png", out);
} catch (IOException e) {
e.printStackTrace();
}
return out.toByteArray();
}
/**
* byte[]数组转BufferedImage图片流
*/
public static BufferedImage bytesToBufferedImage(byte[] ImageByte) {
ByteArrayInputStream in = new ByteArrayInputStream(ImageByte);
BufferedImage image = null;
try {
image = ImageIO.read(in);
} catch (IOException e) {
e.printStackTrace();
}
return image;
}
//图片转化成base64字符串
public static String getImageStr(String imgPath) throws IOException {
File file = new File(imgPath);
String fileContentBase64 = null;
if(file.exists()){
String fileType = imgPath.substring(imgPath.length()-3);
String base64Str = "data:" + fileType + ";base64,";
String content = null;
//将图片文件转化为字节数组字符串,并对其进行Base64编码处理
InputStream in = null;
byte[] data = null;
//读取图片字节数组
try {
in = new FileInputStream(file);
data = new byte[in.available()];
in.read(data);
in.close();
//对字节数组Base64编码
if (data == null || data.length == 0) {
return null;
}
//content = Base64.encodeBytes(data);
content = new BASE64Encoder().encode(data);
if (content == null || "".equals(content)) {
return null;
}
// 缩小图片
if (StringUtils.isNotBlank(content)) {
BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(content));
if (bufferedImage != null){
int height = bufferedImage.getHeight();
int width = bufferedImage.getWidth();
// 如果图片宽度大于650,图片缩放
if (width > 500) {
//高度等比缩放
height = (int)(height*500.0/width);
BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 500, height);
content = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom));
}
}
}
fileContentBase64 = base64Str + content;
} catch (IOException e) {
e.printStackTrace();
} finally {
if (in != null) {
in.close();
}
}
}
return fileContentBase64;
}
}