word转换成html及图片
package com.certusnet.portlet.freeterms.utils;
import java.awt.Image;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.RandomAccessFile;
import https://www.360docs.net/doc/7a3205385.html,.ConnectException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.imageio.ImageIO;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import https://www.360docs.net/doc/7a3205385.html,ermodel.Picture;
import https://www.360docs.net/doc/7a3205385.html,ermodel.PictureType;
import org.w3c.dom.Document;
import com.artofsolving.jodconverter.DocumentConverter;
import
com.artofsolving.jodconverter.openoffice.connection.OpenOfficeCon nection;
import
com.artofsolving.jodconverter.openoffice.connection.SocketOpenOff iceConnection;
import
com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocu mentConverter;
import com.certusnet.portal.kernel.configuration.Configuration; import com.certusnet.portal.kernel.exception.PortalException; import com.certusnet.portal.kernel.util.PropsKeys;
import com.certusnet.portal.kernel.util.StringPool;
import
com.certusnet.portlet.configuration.PortletConfigurationServiceUt il;
import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGImageEncoder;
import com.sun.pdfview.PDFFile;
import com.sun.pdfview.PDFPage;
/**
* 将Word文档转换成html字符串的工具类
*
* @author MZULE
*
*/
public class ConverterUtil {
private static final String _BMS_FREE_TERMS_MANAGE_PORTLET_NAME = "BMS-FreeTermsManage-portlet";
private static final String _EXCEPTION_CONVERSION_FAILURE = "conversion_failure";
private static final String _IMG_SRC_TAG = "
private static String _SOFTWARE_UPLOAD_PATH = null;
private static Process pro = null;
public static String getFileUploadPath(Long versionid){ if(null == _SOFTWARE_UPLOAD_PATH || StringPool.BLANK.equals(_SOFTWARE_UPLOAD_PATH)){
Configuration config = PortletConfigurationServiceUtil
.getConfiguration(
PortletConfigurationServiceUtil.class.getClassLoader(),
_BMS_FREE_TERMS_MANAGE_PORTLET_NAME);
_SOFTWARE_UPLOAD_PATH = config.get("tms.apk.upload.path");
}
return _SOFTWARE_UPLOAD_PATH + versionid + StringPool.FORWARD_SLASH;
}
/**
* 上传文件
*
* @param path 上传路径
* @param fileName 上传文件名
* @param file 文件
* @throws PortalException
*/
public static String uploadFile(String path, String fileName, File file)
throws PortalException {
InputStream from = null;
FileOutputStream to = null;
File pathFile = new File(path);
if (!pathFile.exists()) {
pathFile.mkdirs();
}
pathFile = new File(path +StringPool.FORWARD_SLASH+ fileName);
try {
if (!pathFile.exists()) {
pathFile.createNewFile();
}
from = new FileInputStream(file);
to = new FileOutputStream(pathFile);
byte[] buffer = new byte[4096];
int bytes_read;
while ((bytes_read = from.read(buffer)) != -1)
to.write(buffer, 0, bytes_read);
} catch (Exception e) {
throw new PortalException();
} finally {
try {
if (null != from) {
from.close();
}
if (null != to) {
to.close();
}
} catch (IOException e) {
throw new PortalException();
}
}
return pathFile.getAbsolutePath();
}
public static File docToPdf(File docFile, String filepath) throws PortalException{
try {
startService();
File pdfFile = new File(filepath + StringPool.FORWARD_SLASH + new Date().getTime()+ ".pdf");
OpenOfficeConnection con = new SocketOpenOfficeConnection(8100);
con.connect();
DocumentConverter converter = new OpenOfficeDocumentConverter(con);
converter.convert(docFile, pdfFile);
con.disconnect();
stopService();
return pdfFile;
}
catch (Exception e) {
throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);
}
}
//pdf he picturedir和docpath放在同一目录/目录根据当前版本号创建
public static int pdfToPicture(File pdfFile,String pictureDir) throws PortalException {
// "D:/test/doctest.pdf"D://test//picturehd//
int pictureSum=0;
try{
RandomAccessFile raf = new RandomAccessFile(pdfFile, "r");
FileChannel channel = raf.getChannel();
ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
PDFFile pdffile = new PDFFile(buf);
for (int i = 1; i <= pdffile.getNumPages(); i++) { PDFPage page = pdffile.getPage(i);
Rectangle rect = new Rectangle(0, 0, (int) page.getBBox().getWidth(), (int) page.getBBox().getHeight());
Image img = page.getImage(rect.width, rect.height,rect,null,true,true);
BufferedImage tag = new BufferedImage(rect.width, rect.height,BufferedImage.TYPE_INT_RGB);
tag.getGraphics().drawImage(img, 0, 0, rect.width, rect.height,null);
FileOutputStream out = new FileOutputStream(pictureDir+StringPool.FORWARD_SLASH+ i + ".jpg"); // 输出到文件流
JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);
encoder.encode(tag); // JPEG编码
out.close();
}
pictureSum=pdffile.getNumPages();
}catch(Exception e){
throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);
}
return pictureSum;
}
//压缩并存储图片
public static File compressImage(File file, String directoryFileName,
int width, int height, boolean proportion) throws PortalException {
FileOutputStream fileOutputStream = null;
File compressImage=new File(directoryFileName);
try {
if (file == null || directoryFileName == null) { return null;
}
fileOutputStream = new FileOutputStream(compressImage);
Image image = ImageIO.read(file);
if (image.getWidth(null) == -1) {
return null;
}
int newWidth = 0;
int newHeight = 0;
if (image.getWidth(null) > width || image.getHeight(null) > height) {
if (proportion) {
int rate1 = image.getWidth(null) / width;
int rate2 = image.getHeight(null) / height;
int rate = rate1 > rate2 ? rate1 : rate2;
newWidth = image.getWidth(null) / rate;
newHeight = image.getHeight(null) / rate;
} else {
newWidth = width;
newHeight = height;
}
} else {
newWidth = image.getWidth(null);
newHeight = image.getHeight(null);
}
BufferedImage bufferedImage = new BufferedImage(newWidth,
newHeight, BufferedImage.TYPE_INT_RGB);
bufferedImage.getGraphics().drawImage(
image.getScaledInstance(newWidth, newHeight,
image.SCALE_SMOOTH), 0, 0, null);
JPEGImageEncoder encoder = JPEGCodec
.createJPEGEncoder(fileOutputStream);
encoder.encode(bufferedImage);
fileOutputStream.close();
} catch (Exception e) {
throw new PortalException();
} finally {
if (fileOutputStream != null) {
try {
fileOutputStream.close();
} catch (Exception e) {
throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);
}
}
}
return compressImage;
}
/**
* 将word文档转换成html文档
*
* @param docFile
* 需要转换的word文档
* @param filepath
* 转换之后html的存放路径
* @return 转换之后的html文件
* @throws IOException
* @throws PortalException
*/
public static String docToHtml(File docFile, String filepath) throws TransformerException, IOException, ParserConfigurationException, PortalException {
filepath = filepath + StringPool.FORWARD_SLASH;
String htmlFileName = new Date().getTime()+ ".html";
String htmlFilePath=filepath + htmlFileName;
InputStream input = new FileInputStream(docFile);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocum entBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName,
float widthInches,float heightInches) {
return suggestedName;
}
});
wordToHtmlConverter.processDocument(wordDocument);
List pics = wordDocument.getPicturesTable().getAllPictures(); if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new
FileOutputStream(filepath + pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(htmlFilePath), "UTF-8");
osw.write(content);
osw.flush();
osw.close();
//这是源文件
File htmlFile = new File(htmlFilePath);
String htmlMoveFilePath = "";
try {
htmlMoveFilePath = uploadHtmlFile(htmlFile.getName(), htmlFile);
} catch (PortalException e) {
throw new PortalException(_EXCEPTION_CONVERSION_FAILURE);
}
//FileUtils.write(new File(filepath, htmlFileName), content, "utf-8");
return htmlMoveFilePath;
}
/**
* 上传文件
*
* @param fileName 上传文件名
* @param file 源文件
* @throws PortalException
*/
public static String uploadHtmlFile(String fileName, File file) throws PortalException {
Configuration config = PortletConfigurationServiceUtil
.getConfiguration(
PortletConfigurationServiceUtil.class.getClassLoader(),
_BMS_FREE_TERMS_MANAGE_PORTLET_NAME);
String htmpservicepath = config.get("free.terms.html.path");
///upload-apk/htmlfile
String portalWebDir = System.getProperty("env.DOMAIN_HOME") + "/portal/ROOT"; //获取服务器路径
//需要移动的文件路径
htmpservicepath = portalWebDir + htmpservicepath;
//需要移动到的文件
File htmlServiceFile = new File(htmpservicepath);
if (!htmlServiceFile.exists()) {
htmlServiceFile.mkdirs();
}
htmlServiceFile = new File(htmpservicepath +StringPool.FORWARD_SLASH+ fileName);
InputStream from = null;
FileOutputStream to = null;
try {
if (!htmlServiceFile.exists()) {
htmlServiceFile.createNewFile();
}
from = new FileInputStream(file);
to = new FileOutputStream(htmlServiceFile.getPath());
byte[] buffer = new byte[4096];
int bytes_read;
while ((bytes_read = from.read(buffer)) != -1)
to.write(buffer, 0, bytes_read);
} catch (Exception e) {
throw new PortalException() ;
} finally {
try {
if (null != from) {
from.close();
}
if (null != to) {
to.close();
}
} catch (IOException e) {
throw new PortalException() ;
}
}
return htmlServiceFile.getAbsolutePath();
}
public static void startService() throws PortalException{ // 启动OpenOffice的服务 -nofirststartwizard &
String command = "/opt/openoffice4/program/soffice -headless
-accept=\"socket,host=127.0.0.1,port=8100;urp;StarOffice.Service\ " -nofirststartwizard &";
// connect to an https://www.360docs.net/doc/7a3205385.html, instance running on port 8100
try{
pro = Runtime.getRuntime().exec(command);
}catch(Exception ex){
throw new PortalException("conversion_service_unstarted") ;
}
}
public static void stopService(){
if(pro!=null){
pro.destroy();
}
}
/**
* 清除一些不需要的html标记
*
* @param htmlStr
* 带有复杂html标记的html语句
* @return 去除了不需要html标记的语句
*/
protected static String clearFormat(String htmlStr, String docImgPath) {
// 获取body内容的正则
String bodyReg = "
";Pattern bodyPattern = https://www.360docs.net/doc/7a3205385.html,pile(bodyReg);
Matcher bodyMatcher = bodyPattern.matcher(htmlStr);
if (bodyMatcher.find()) {
// 获取BODY内容,并转化BODY标签为DIV
htmlStr = bodyMatcher.group().replaceFirst("
", "}
// 调整图片地址
htmlStr = htmlStr.replaceAll(_IMG_SRC_TAG, _IMG_SRC_TAG + docImgPath
+ StringPool.FORWARD_SLASH);
// 把
转换成