Java 读取Word文本框中的文本、图片、表格

合集下载

Java 读取Word中的文本的图片

Java 提取Word中的文本和图片本文将介绍通过Java来提取或读取Word文档中文本和图片的方法。

这里提取文本和图片包括同时提取文档正文当中以及页眉、页脚中的的文本和图片。

使用工具：Spire.Doc for JavaJar文件导入方法（参考）：方法1：下载jar文件包。

下载后解压文件，并将lib文件夹下的Spire.Doc.jar文件导入到java程序。

导入效果参考如下：方法2：可通过maven导入。

参考导入方法。

测试文档如下：Java 代码示例（供参考）【示例1】提取Word 中的文本 import com.spire.doc.*; import java.io.FileWriter;import java.io.IOException;public class ExtractText {public static void main(String[] args) throws IOException{//加载测试文档Document doc = new Document();doc.loadFromFile("test.docx");//获取文本保存为StringString text = doc.getText();//将String写入TxtwriteStringToTxt(text,"提取文本.txt");}public static void writeStringToTxt(String content, String txtFileName) throws IOException {FileWriter fWriter= new FileWriter(txtFileName,true);try {fWriter.write(content);}catch(IOException ex){ex.printStackTrace();}finally{try{fWriter.flush();fWriter.close();} catch (IOException ex) {ex.printStackTrace();}}}}文本提取结果：【示例2】提取Word中的图片import com.spire.doc.Document;import com.spire.doc.documents.DocumentObjectType;import com.spire.doc.fields.DocPicture;import com.spire.doc.interfaces.ICompositeObject;import com.spire.doc.interfaces.IDocumentObject;import javax.imageio.ImageIO;import java.awt.image.RenderedImage;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.LinkedList;import java.util.List;import java.util.Queue;public class ExtractImg {public static void main(String[] args) throws IOException { //加载Word文档Document document = new Document();document.loadFromFile("test.docx");//创建Queue对象Queue nodes = new LinkedList();nodes.add(document);//创建List对象List images = new ArrayList();//遍历文档中的子对象while (nodes.size() > 0) {ICompositeObject node = (ICompositeObject) nodes.poll();for (int i = 0; i < node.getChildObjects().getCount(); i++) {IDocumentObject child = node.getChildObjects().get(i);if (child instanceof ICompositeObject) {nodes.add((ICompositeObject) child);//获取图片并添加到Listif (child.getDocumentObjectType() == DocumentObjectType.Picture) { DocPicture picture = (DocPicture) child;images.add(picture.getImage());}}}}//将图片保存为PNG格式文件for (int i = 0; i < images.size(); i++) {File file = new File(String.format("图片-%d.png", i));ImageIO.write((RenderedImage) images.get(i), "PNG", file);}}}图片提取结果：（本文完）。

JavaPOI操作word文档内容、表格

JavaPOI操作word⽂档内容、表格⼀、pom<dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>4.0.0</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>4.0.0</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>4.0.0</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml-schemas</artifactId><version>4.0.0</version></dependency>⼆、直接上代码word模板中${content} 注意我只有在.docx⽤XWPFDocument才有效2.1/*** 获取document**/XWPFDocument document = null;try {document = new XWPFDocument(inputStream);} catch (IOException ioException) {ioException.printStackTrace();}/*** 替换段落⾥⾯的变量** @param doc 要替换的⽂档* @param params 参数*/private void replaceInPara(XWPFDocument doc, Map<String, String> params) {for (XWPFParagraph para : doc.getParagraphs()) {replaceInPara(para, params);}}/*** 替换段落⾥⾯的变量** @param para 要替换的段落* @param params 参数*/private void replaceInPara(XWPFParagraph para, Map<String, String> params) {List<XWPFRun> runs;Matcher matcher;replaceText(para);//如果para拆分的不对，则⽤这个⽅法修改成正确的if (matcher(para.getParagraphText()).find()) {runs = para.getRuns();for (int i = 0; i < runs.size(); i++) {XWPFRun run = runs.get(i);String runText = run.toString();matcher = matcher(runText);if (matcher.find()) {while ((matcher = matcher(runText)).find()) {runText = matcher.replaceFirst(String.valueOf(params.get(matcher.group(1))));}//直接调⽤XWPFRun的setText()⽅法设置⽂本时，在底层会重新创建⼀个XWPFRun，把⽂本附加在当前⽂本后⾯， para.removeRun(i);para.insertNewRun(i).setText(runText);}}}}/*** 替换⽂本内容* @param para* @return*/private List<XWPFRun> replaceText(XWPFParagraph para) {List<XWPFRun> runs = para.getRuns();String str = "";boolean flag = false;for (int i = 0; i < runs.size(); i++) {XWPFRun run = runs.get(i);String runText = run.toString();if (flag || runText.equals("${")) {str = str + runText;flag = true;para.removeRun(i);if (runText.equals("}")) {flag = false;para.insertNewRun(i).setText(str);str = "";}i--;}}return runs;}2.22.2.1XWPFTable table = document.getTableArray(0);//获取当前表格XWPFTableRow twoRow = table.getRow(2);//获取某⼀⾏XWPFTableRow nextRow = table.insertNewTableRow(3);//插⼊⼀⾏XWPFTableCell firstRowCellOne = firstRow.getCell(0);firstRowCellOne.removeParagraph(0);//删除默认段落，要不然表格内第⼀条为空⾏XWPFParagraph pIO2 =firstRowCellOne.addParagraph();XWPFRun rIO2 = pIO2.createRun();rIO2.setFontFamily("宋体");//字体rIO2.setFontSize(8);//字体⼤⼩rIO2.setBold(true);//是否加粗rIO2.setColor("FF0000");//字体颜⾊rIO2.setText("这是写⼊的内容");//rIO2.addBreak(BreakType.TEXT_WRAPPING);//软换⾏，亲测有效/*** 复制单元格和样式** @param targetRow 要复制的⾏* @param sourceRow 被复制的⾏*/public void createCellsAndCopyStyles(XWPFTableRow targetRow, XWPFTableRow sourceRow) {targetRow.getCtRow().setTrPr(sourceRow.getCtRow().getTrPr());List<XWPFTableCell> tableCells = sourceRow.getTableCells();if (CollectionUtils.isEmpty(tableCells)) {return;}for (XWPFTableCell sourceCell : tableCells) {XWPFTableCell newCell = targetRow.addNewTableCell();newCell.getCTTc().setTcPr(sourceCell.getCTTc().getTcPr());List sourceParagraphs = sourceCell.getParagraphs();if (CollectionUtils.isEmpty(sourceParagraphs)) {continue;}XWPFParagraph sourceParagraph = (XWPFParagraph) sourceParagraphs.get(0);List targetParagraphs = newCell.getParagraphs();if (CollectionUtils.isEmpty(targetParagraphs)) {XWPFParagraph p = newCell.addParagraph();p.getCTP().setPPr(sourceParagraph.getCTP().getPPr());XWPFRun run = p.getRuns().isEmpty() ? p.createRun() : p.getRuns().get(0);run.setFontFamily(sourceParagraph.getRuns().get(0).getFontFamily());} else {XWPFParagraph p = (XWPFParagraph) targetParagraphs.get(0);p.getCTP().setPPr(sourceParagraph.getCTP().getPPr());XWPFRun run = p.getRuns().isEmpty() ? p.createRun() : p.getRuns().get(0);if (sourceParagraph.getRuns().size() > 0) {run.setFontFamily(sourceParagraph.getRuns().get(0).getFontFamily());}}}}#### 2.2.3/*** 合并单元格** @param table 表格对象* @param beginRowIndex 开始⾏索引* @param endRowIndex 结束⾏索引* @param colIndex 合并列索引*/public void mergeCell(XWPFTable table, int beginRowIndex, int endRowIndex, int colIndex) { if (beginRowIndex == endRowIndex || beginRowIndex > endRowIndex) {return;}//合并⾏单元格的第⼀个单元格CTVMerge startMerge = CTVMerge.Factory.newInstance();startMerge.setVal(STMerge.RESTART);//合并⾏单元格的第⼀个单元格之后的单元格CTVMerge endMerge = CTVMerge.Factory.newInstance();endMerge.setVal(STMerge.CONTINUE);table.getRow(beginRowIndex).getCell(colIndex).getCTTc().getTcPr().setVMerge(startMerge); for (int i = beginRowIndex + 1; i <= endRowIndex; i++) {table.getRow(i).getCell(colIndex).getCTTc().getTcPr().setVMerge(endMerge);}}/*** insertRow 在word表格中指定位置插⼊⼀⾏，并将某⼀⾏的样式复制到新增⾏* @param copyrowIndex 需要复制的⾏位置* @param newrowIndex 需要新增⼀⾏的位置* */public static void insertRow(XWPFTable table, int copyrowIndex, int newrowIndex) {// 在表格中指定的位置新增⼀⾏XWPFTableRow targetRow = table.insertNewTableRow(newrowIndex);// 获取需要复制⾏对象XWPFTableRow copyRow = table.getRow(copyrowIndex);//复制⾏对象targetRow.getCtRow().setTrPr(copyRow.getCtRow().getTrPr());//或许需要复制的⾏的列List<XWPFTableCell> copyCells = copyRow.getTableCells();//复制列对象XWPFTableCell targetCell = null;for (int i = 0; i < copyCells.size(); i++) {XWPFTableCell copyCell = copyCells.get(i);targetCell = targetRow.addNewTableCell();targetCell.getCTTc().setTcPr(copyCell.getCTTc().getTcPr());if (copyCell.getParagraphs() != null && copyCell.getParagraphs().size() > 0) {targetCell.getParagraphs().get(0).getCTP().setPPr(copyCell.getParagraphs().get(0).getCTP().getPPr()); if (copyCell.getParagraphs().get(0).getRuns() != null&& copyCell.getParagraphs().get(0).getRuns().size() > 0) {XWPFRun cellR = targetCell.getParagraphs().get(0).createRun();cellR.setBold(copyCell.getParagraphs().get(0).getRuns().get(0).isBold());}}}}/*** 正则匹配字符串** @param str* @return*/private Matcher matcher(String str) {Pattern pattern = pile("\\$\\{(.+?)\\}", Pattern.CASE_INSENSITIVE);Matcher matcher = pattern.matcher(str);return matcher;}。

java中读取word文档里的内容

import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import ermodel.XWPFDocument; import ermodel.XWPFParagraph; import ermodel.XWPFPictureData; import ermodel.XWPFRun; import ermodel.XWPFTable; import ermodel.XWPFTableCell; import ermodel.XWPFTableRow;
Iterator<XWPFTable> itTable = document.getTablesIterator(); int ind = 0; while (itTable.hasNext()){
ind++; XWPFTable table = (XWPFTable) itTable.next(); //行 int rcount = table.getNumberOfRows(); for (int i = 0; i < rcount; i++){
// OPCPt.openPackage(srcPath); // XWPFDocument doc = new XWPFDocument(pack); return xdoc; } catch (IOException e) { System.out.println("读取文件出错！"); e.printStackTrace(); return null; } } return null; }
}
//pom.xml文件
<project xmlns="/POM/4.0.0" xmlns:xsi="/2001/XMLSchema-instance" xsi:schemaLocation="/POM/4.0.0 /xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId></groupId> <artifactId>excelReadAndWrite</artifactId> <version>0.0.1-SNAPSHOT</version> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.directory.studio</groupId> <artifactId>mons.codec</artifactId> <version>1.8</version> </dependency> <dependency> <groupId>net.sourceforge.jexcelapi</groupId> <artifactId>jxl</artifactId> <version>2.6.12</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.9</version> </dependency> </dependencies>

java读取word并解析

java读取word并解析java POI3.8处理word模板,⽂字图⽚表格将word模板⾥⾯的特殊标签换成⽂字,图⽚,以下是处理的代码特殊标签最好的复制粘贴到word模板⾥⾯ ,因为⼿动敲⼊可能有点⼩的差别都导致这个标签不是⼀⼩块(chunk)这样会⽆法识别,⽂字样式设置的时候也最好选择特殊标签整体进⾏设置,尽量不要多选(例如标签后⾯跟上⼀个空格)这⾥的替换包含了⽂字样式的替换,图⽚的替换-------------------------------------------------------------------------------------------------------------------------------------package com.util.export;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.apache.poi.POIXMLDocument;import org.apache.poi.openxml4j.exceptions.InvalidFormatException;import ermodel.ParagraphAlignment;import ermodel.UnderlinePatterns;import ermodel.VerticalAlign;import ermodel.XWPFDocument;import ermodel.XWPFParagraph;import ermodel.XWPFRun;import ermodel.XWPFTable;import ermodel.XWPFTableCell;import ermodel.XWPFTableRow;import org.apache.xmlbeans.XmlException;import org.apache.xmlbeans.XmlToken;import org.openxmlformats.schemas.drawingml.x2006.main.CTNonVisualDrawingProps;import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D;import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;public class WordCompileReport {public static void searchAndReplace(String srcPath, String destPath,Map map,Map mapImage) {try {XWPFDocument document = new XWPFDocument(POIXMLDocument.openPackage(srcPath));//替换表格占位符checkTables(document,map);//替换段落占位符checkParagraphs(document,map);//在末尾添加⽂字addParagraphToWord(document,"这⾥添加⽂字",30,0,"#EB9074",true);//替换图⽚replaceTextToImage(document,mapImage,200,200);FileOutputStream outStream = null;outStream = new FileOutputStream(destPath);document.write(outStream);outStream.close();} catch (Exception e) {e.printStackTrace();}}public static void checkTables(XWPFDocument document,Map map) {Iterator it = document.getTablesIterator();while (it.hasNext()) {XWPFTable table = (XWPFTable) it.next();int rcount = table.getNumberOfRows();for (int i = 0; i < rcount; i++) {XWPFTableRow row = table.getRow(i);List cells = row.getTableCells();for (XWPFTableCell cell : cells) {List listCell;for (Entry e : map.entrySet()) {listCell = cell.getParagraphs();List cellRun;Map mapAttr = new HashMap();for (int j = 0; j < listCell.size(); j++) {if (listCell.get(j).getText().indexOf(e.getKey()) != -1) {cellRun = listCell.get(j).getRuns();for (int c = 0; c < cellRun.size(); c++) {if (cellRun.get(c).getText(0).equals(e.getKey())) {mapAttr = getWordXWPFRunStyle(cellRun.get(c));listCell.get(j).removeRun(c);XWPFRun newRun = listCell.get(j).insertNewRun(c);setWordXWPFRunStyle(newRun, mapAttr,e.getValue(), false);}}}}}}}}}public static void checkParagraphs(XWPFDocument document,Map map){List listRun;Map mapAttr = new HashMap();List listParagraphs = document.getParagraphs();for (int sa = 0; sa < listParagraphs.size(); sa++) {for (Entry e : map.entrySet()) {if (listParagraphs.get(sa).getText().indexOf(e.getKey()) != -1) {listRun = listParagraphs.get(sa).getRuns();for (int p = 0; p < listRun.size(); p++) {if (listRun.get(p).toString().equals(e.getKey())) {//得到占位符的⽂本格式XWPFRun runOld = listParagraphs.get(sa).getRuns().get(p);mapAttr=getWordXWPFRunStyle(runOld); //封装该占位符⽂本样式到map listParagraphs.get(sa).removeRun(p);//移除占位符//创建设置对应占位符的⽂本XWPFRun runNew = listParagraphs.get(sa).insertNewRun(p);setWordXWPFRunStyle(runNew,mapAttr,e.getValue(),true);}}}}}}public static Map getWordXWPFRunStyle(XWPFRun runOld){Map mapAttr = new HashMap();mapAttr.put("Color", runOld.getColor());if(-1==runOld.getFontSize()){mapAttr.put("FontSize", 12);}else{mapAttr.put("FontSize", runOld.getFontSize());}mapAttr.put("Subscript", runOld.getSubscript());mapAttr.put("Underline", runOld.getUnderline());mapAttr.put("FontFamily",runOld.getFontFamily());return mapAttr;}public static XWPFRun setWordXWPFRunStyle(XWPFRun runNew,Map mapAttr,String text,boolean flag){ runNew.setColor((String) mapAttr.get("Color"));if("-1".equals(mapAttr.get("FontSize").toString())){//处理⼩四字号读取为-1的问题runNew.setFontSize(12);}else{runNew.setFontSize((Integer) mapAttr.get("FontSize"));}runNew.setBold(flag);runNew.setUnderline((UnderlinePatterns) mapAttr.get("Underline"));runNew.setText(text);runNew.setSubscript((VerticalAlign) mapAttr.get("Subscript"));runNew.setFontFamily((String) mapAttr.get("FontFamily"));return runNew;}public static void updatePicture(XWPFDocument document,int id, int width, int height) {if(id==0){id = document.getAllPictures().size()-1;}final int EMU = 9525;width *= EMU;height *= EMU;String blipId = document.getAllPictures().get(id).getPackageRelationship().getId();CTInline inline = document.createParagraph().createRun().getCTR().addNewDrawing().addNewInline();String picXml = ""+ ""+ " "+ " "+ " " + "+ id+ "\" name=\"Generated\"/>"+ " "+ " "+ " "+ "+ blipId+ "\" xmlns:r=\"/officeDocument/2006/relationships\"/>"+ " "+ " "+ " "+ " "+ " "+ " "+ " "+ "+ width+ "\" cy=\""+ height+ "\"/>"+ " "+ " "+ " "+ " "+ " "+ " "+ " " + "";// CTGraphicalObjectData graphicData =inline.addNewGraphic().addNewGraphicData();XmlToken xmlToken = null;try {xmlToken = XmlToken.Factory.parse(picXml);} catch (XmlException xe) {xe.printStackTrace();}inline.set(xmlToken);// graphicData.set(xmlToken);inline.setDistT(0);inline.setDistB(0);inline.setDistL(0);inline.setDistR(0);CTPositiveSize2D extent = inline.addNewExtent();extent.setCx(width);extent.setCy(height);CTNonVisualDrawingProps docPr = inline.addNewDocPr();docPr.setId(id);docPr.setName("IMG_" + id);docPr.setDescr("IMG_" + id);}public static void addPictureToWord(XWPFDocument document,String imagePath,int imageType,int width,int height){if(0==imageType){imageType=XWPFDocument.PICTURE_TYPE_JPEG;}try {String ind = document.addPictureData(new FileInputStream(imagePath), imageType);} catch (InvalidFormatException e) {e.printStackTrace();} catch (FileNotFoundException e) {e.printStackTrace();}updatePicture(document,document.getAllPictures().size()-1,400,400);}public static void addParagraphToWord(XWPFDocument document,String text,int fontSize,int alignment,String RGBColor,boolean isBold){XWPFParagraph paragraph = document.createParagraph();if(1==alignment){paragraph.setAlignment(ParagraphAlignment.CENTER);}else if(2==alignment){paragraph.setAlignment(ParagraphAlignment.CENTER);}else if(3==alignment){paragraph.setAlignment(ParagraphAlignment.RIGHT);}else{paragraph.setIndentationLeft(alignment);}XWPFRun runOne = paragraph.createRun();runOne.setText(text);runOne.setBold(isBold);runOne.setFontSize(fontSize);if(RGBColor.startsWith("#")){runOne.setColor(RGBColor.substring(1));}else{runOne.setColor(RGBColor);}}public static void addRunToParagraph(XWPFParagraph paragraph,String text,int fontSize,String RGBColor,boolean isBold,boolean isWrap){XWPFRun runText = paragraph.createRun();// runText.setStrike(true); //删除线runText.setBold(isBold);runText.setColor(RGBColor);runText.setFontSize(fontSize);runText.setText(text);if(isWrap)runText.addBreak();}public static void replaceTextToImage(XWPFDocument document,Map mapImage,int width,int height){List listRun;List listParagraphs = document.getParagraphs();for (int sa = 0; sa < listParagraphs.size(); sa++) {for (Entry e : mapImage.entrySet()) {if (listParagraphs.get(sa).getText().indexOf(e.getKey()) != -1) {listRun = listParagraphs.get(sa).getRuns();for (int p = 0; p < listRun.size(); p++) {if (listRun.get(p).toString().equals(e.getKey())) {listParagraphs.get(sa).removeRun(p);//移除占位符//获得当前CTInlineCTInline inline = listParagraphs.get(sa).createRun().getCTR().addNewDrawing().addNewInline();try {insertPicture(document,e.getValue(),inline,width,height);} catch (InvalidFormatException e1) {e1.printStackTrace();} catch (FileNotFoundException e1) {e1.printStackTrace();}}}}}}}public static void insertPicture(XWPFDocument document,String filePath,CTInline inline,int width, int height) throws InvalidFormatException, FileNotFoundException{String ind = document.addPictureData(new FileInputStream(filePath), 5);int id = document.getAllPictures().size()-1;final int EMU = 9525;width *= EMU;height *= EMU;String blipId = document.getAllPictures().get(id).getPackageRelationship().getId();String picXml = ""+ ""+ " "+ " "+ " " + "+ id+ "\" name=\"Generated\"/>"+ " "+ " "+ " "+ "+ blipId+ "\" xmlns:r=\"/officeDocument/2006/relationships\"/>"+ " "+ " "+ " "+ " "+ " "+ " "+ " "+ "+ width+ "\" cy=\""+ height+ "\"/>"+ " "+ " "+ " "+ " "+ " "+ " "+ " " + "";inline.addNewGraphic().addNewGraphicData();XmlToken xmlToken = null;try {xmlToken = XmlToken.Factory.parse(picXml);} catch (XmlException xe) {xe.printStackTrace();}inline.set(xmlToken);inline.setDistT(0);inline.setDistB(0);inline.setDistL(0);inline.setDistR(0);CTPositiveSize2D extent = inline.addNewExtent();extent.setCx(width);extent.setCy(height);CTNonVisualDrawingProps docPr = inline.addNewDocPr(); docPr.setId(id);docPr.setName("IMG_" + id);docPr.setDescr("IMG_" + id);}public static void main(String[] args) {HashMap map = new HashMap();HashMap mapImage = new HashMap();map.put("${name}$", "02");map.put("${userIDs}$", "5201314");mapImage.put("${image1}$", "F:\\A.jpg");mapImage.put("${image2}$", "F:\\B.jpg");String srcPath = "c:\\zhenli\\cc.docx";String destPath = "c:\\zhenli\\输出模版.docx";searchAndReplace(srcPath, destPath, map,mapImage);}}。

Java实现word文档在线预览，读取office（word,excel,ppt）文件

Java实现word⽂档在线预览，读取office（word,excel,ppt）⽂件想要实现word或者其他office⽂件的在线预览，⼤部分都是⽤的两种⽅式，⼀种是使⽤openoffice转换之后再通过其他插件预览，还有⼀种⽅式就是通过POI读取内容然后预览。

⼀、使⽤openoffice⽅式实现word预览主要思路是：1.通过第三⽅⼯具openoffice，将word、excel、ppt、txt等⽂件转换为pdf⽂件2.通过swfTools将pdf⽂件转换成swf格式的⽂件3.通过FlexPaper⽂档组件在页⾯上进⾏展⽰我使⽤的⼯具版本：openof：3.4.1swfTools：1007FlexPaper：这个关系不⼤，我随便下的⼀个。

推荐使⽤1.5.1JODConverter：需要jar包，如果是maven管理直接引⽤就可以操作步骤：1.office准备下载openoffice：从过往⽂件，其他语⾔中找到中⽂版3.4.1的版本下载后，解压缩，安装然后找到安装⽬录下的program ⽂件夹在⽬录下运⾏soffice -headless -accept="socket,host=127.0.0.1,port=8100;urp;" -nofirststartwizard如果运⾏失败，可能会有提⽰，那就加上 .\ 在运⾏试⼀下这样openoffice的服务就开启了。

2.将flexpaper⽂件中的js⽂件夹(包含了flexpaper_flash_debug.js，flexpaper_flash.js,jquery.js,这三个js⽂件主要是预览swf⽂件的插件)拷贝⾄⽹站根⽬录;将FlexPaperViewer.swf拷贝⾄⽹站根⽬录下(该⽂件主要是⽤在⽹页中播放swf⽂件的播放器)项⽬结构：页⾯代码：fileUpload.jsp<%@ page language="java" contentType="text/html; charset=UTF-8"pageEncoding="UTF-8"%><!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "/TR/html4/loose.dtd"><html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><title>⽂档在线预览系统</title><style>body {margin-top:100px;background:#fff;font-family: Verdana, Tahoma;}a {color:#CE4614;}#msg-box {color: #CE4614; font-size:0.9em;text-align:center;}#msg-box .logo {border-bottom:5px solid #ECE5D9;margin-bottom:20px;padding-bottom:10px;}#msg-box .title {font-size:1.4em;font-weight:bold;margin:0 0 30px 0;}#msg-box .nav {margin-top:20px;}</style></head><body><div id="msg-box"><form name="form1" method="post" enctype="multipart/form-data" action="docUploadConvertAction.jsp"><div class="title">请上传要处理的⽂件，过程可能需要⼏分钟，请稍候⽚刻。

JAVA读取WORD_pdf等

JAVA读取WORD,EXCEL,POWERPOINT,PDF文件的方式OFFICE文档使用POI控件，PDF可以使用PDFBOX0.7.3控件，完全支持中文，用XPDF也行.java2word 是一个在java程序中调用MS Office Word 文档的组件(类库)。

该组件提供了一组简单的接口，以便java 档。

这些服务包括：打开文档、新建文档、查找文字、替换文字，插入文字、插入图片、插入表格，在书签处插入文字、插入图片、插入表格等。

填充数据到表格中读取表格数据更多激动人心的功能见详细说明：用jacob.其实jacob是一个bridage，连接java和com或者win32函数的一个中间件，jacob并不能直接抽取word,excel等文有为你写好的了，就是jacob的作者一并提供了。

jacob下载：下载了jacob并放到指定的路径之后(dll放到path,jar文件放到classpath)，就可以写你自己的抽取程序了，下面是一个import java.io.File;import .*;import com.jacob.activeX.*;public class FileExtracter{public static void main(String[] args) {ActiveXComponent app = new ActiveXComponent("Word.Application");String inFile = "c:\\test.doc";String tpFile = "c:\\temp.htm";String otFile = "c:\\temp.xml";boolean flag = false;try {app.setProperty("Visible", new Variant(false));Object docs = app.getProperty("Documents").toDispatch();Object doc = Dispatch.invoke(docs,"Open", Dispatch.Method, new Object[]{inFile,new Variant(false), new int[1]).toDispatch();Dispatch.invoke(doc,"SaveAs", Dispatch.Method, new Object[]{tpFile,new Variant(8)}, new int[1]); Variant f = new Variant(false);Dispatch.call(doc, "Close", f);flag = true;} catch (Exception e) {e.printStackTrace();} finally {app.invoke("Quit", new Variant[] {});}}}2。

[原创]java读写word文档，完美解决方案

[原创]java读写word⽂档，完美解决⽅案做项⽬的过程中，经常需要把数据⾥⾥的数据读出来，经过加⼯，以word格式输出。

在⽹上找了很多解决⽅案都不太理想，偶尔发现了PageOffice，⼀个国产的Office插件，开发调⽤⾮常简单！⽐⽹上介绍的poi，jacob等解决⽅按容易上⼿多了！功能接⼝虽然没有poi，jacob那么多，但是满⾜了⼀般的需求的百分之⼋九⼗，⽽且不像poi那样还需要区分处理07格式和03格式那么⿇烦。

下⾯是百度百科PageOffice的介绍： https:///item/PageOffice/2737741?fr=aladdinPageOffice的基本功能包括：在web⽹页⾥打开、编辑、打印预览、打印Word、Excel、PowerPoint等Office⽂档。

⽂档并发控制机制。

提供Word修订痕迹、⼿写批注、圈阅划线、键盘批注、电⼦印章等OA公⽂模块的必备功能。

根据数据库动态将数据，包括⽂本、图⽚、表格等填充、导出到Word、Excel模板中指定的位置处，并且可以动态指定内容的各种格式。

提取Word、Excel⽂档中指定位置处的内容，包括⽂本、图⽚（Excel暂不⽀持）、表格等，保存到数据库。

其他功能就不细说了......更多的接⼝说明参考PageOffice官⽹API：/help/java3/index.html下载地址：/dowm/下载PageOffice开发包之后，拷贝 Samples4 ⽂件夹到 Tomcat 的 Webapps ⽬录下，访问：http://localhost:8080/Samples4/index.html 就会看到所有官⽅⽰例效果的列表，重点参考以下⼏个⽰例：⼀、11、给Word⽂档中的数据区域（DataRegion）赋值的简单⽰例⼀、17、给Word⽂档中Table赋值的简单⽰例⼀、18、使⽤数据标签（DataTag）给Word⽂件填充⽂本数据⼆、6、后台编程插⼊Word⽂件到数据区域⼆、7、后台编程插⼊图⽚到数据区域⼆、8、后台编程插⼊Excel⽂件到数据区域⼆、9、给Word⽂档添加⽔印⼆、10、使⽤数据标签（DataTag）给Word⽂件填充带格式的数据⼆、11、在Word中动态创建数据区域我们可以写⼀个简单的程序测试⼀下效果：PageOfficeCtrl poCtrl1 = new PageOfficeCtrl(request);poCtrl1.setServerPage(request.getContextPath()+"/poserver.zz");WordDocument worddoc = new WordDocument();//先在要插⼊word⽂件的位置⼿动插⼊书签,书签必须以“PO_”为前缀//给DataRegion赋值,值的形式为："纯⽂本内容、[word]word⽂件路径[/word]、[image]图⽚路径[/image]"DataRegion data1 = worddoc.openDataRegion("PO_p1");data1.setValue("测试字符串");//纯⽂本内容DataRegion data2 = worddoc.openDataRegion("PO_p2");data2.setValue("[word]doc/2.doc[/word]");//插⼊word⽂件DataRegion data3 = worddoc.openDataRegion("PO_p3");data3.setValue("[image]doc/1.jpg[/image]");//插⼊图⽚//打开⽂件、填充数据poCtrl1.setWriter(worddoc);poCtrl1.webOpen("doc/template.doc", OpenModeType.docNormalEdit, "⽤户名"); 代码很简单。

java读取word文档,提取标题和内容的实例

java读取word⽂档,提取标题和内容的实例使⽤的⼯具为poi，需要导⼊的依赖如下<dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>3.17</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>3.17</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>3.17</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>ooxml-schemas</artifactId><version>1.1</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml-schemas</artifactId><version>3.17</version></dependency>我采⽤的分离⽅式是根据字体⼤⼩判断。

1、下载文档前请自行甄别文档内容的完整性，平台不提供额外的编辑、内容补充、找答案等附加服务。
2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
3、如文档侵犯您的权益，请联系客服反馈,我们会尽快为您处理(人工客服工作时间：9:00-18:30)。

Java 读取Word文本框中的文本/图片/表格
Word可插入文本框，文本框中可嵌入文本、图片、表格等内容。

对文档中的已有文本框，也可以读取其中的内容。

本文以Java程序代码来展示如何读取文本框，包括读取文本框中的文本、图片以及表格等。

【程序环境】
程序编辑环境为IntelliJ IDEA，并在程序中引入了free Spire.Doc.jar 3.9.0，安装的Jdk版本为1.8.0。

【源文档】
程序中用于测试的Word源文档如下图：
【程序代码】
1.读取文本框中的文本
import com.spire.doc.*;
import com.spire.doc.documents.Paragraph;
import com.spire.doc.fields.TextBox;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
public class ExtractText {
public static void main(String[] args) throws IOException {
//加载含有文本框的Word文档
Document doc = new Document();
doc.loadFromFile("sample.docx");
//获取文本框
TextBox textbox = doc.getTextBoxes().get(0);
//保存文本框中的文本到指定文件
File file = new File("ExtractedText.txt");
if (file.exists())
{
file.delete();
}
file.createNewFile();
FileWriter fw = new FileWriter(file, true);
BufferedWriter bw = new BufferedWriter(fw);
//遍历文本框中的对象
for (Object object:textbox.getBody().getChildObjects()) {
//判定是否为文本段落
if(object instanceof Paragraph)
{
//获取段落中的文本
String text = ((Paragraph) object).getText();
//写入文本到txt文档
bw.write(text);
}
}
bw.flush();
bw.close();
fw.close();
}
}
2.读取文本框中的图片
import com.spire.doc.*;
import com.spire.doc.documents.Paragraph;
import com.spire.doc.fields.DocPicture;
import com.spire.doc.fields.TextBox;
import javax.imageio.ImageIO;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class ExtractImg {
public static void main(String[] args) throws IOException {
//加载含有文本框的Word文档
Document doc = new Document();
doc.loadFromFile("sample.docx");
//获取文本框
TextBox textbox = doc.getTextBoxes().get(0);
//创建List对象
List images = new ArrayList();
//遍历文本框中所有段落
for (int i = 0 ; i < textbox.getBody().getParagraphs().getCount();i++) {
Paragraph paragraph = textbox.getBody().getParagraphs().get(i);
//遍历段落中的所有子对象
for (int j = 0; j < paragraph.getChildObjects().getCount(); j++) {
Object object = paragraph.getChildObjects().get(j);
//判定对象是否为图片
if (object instanceof DocPicture)
{
//获取图片
DocPicture picture = (DocPicture) object;
images.add(picture.getImage());
}
}
}
//将图片以PNG文件格式保存
for (int z = 0; z < images.size(); z++) {
File file = new File(String.format("图片-%d.png", z));
ImageIO.write((RenderedImage) images.get(z), "PNG", file); }
}
}
3.读取文本框中的表格
import com.spire.doc.*;
import com.spire.doc.documents.Paragraph;
import com.spire.doc.fields.TextBox;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
public class ExtractTable {
public static void main(String[]args) throws IOException { //加载Word测试文档
Document doc = new Document();
doc.loadFromFile("sample.docx");
//获取文本框
TextBox textbox = doc.getTextBoxes().get(0);
//获取文本框中的表格
Table table = textbox.getBody().getTables().get(0);
//保存到文本文件
File file = new File("ExtractedTable.txt");
if (file.exists())
{
file.delete();
}
file.createNewFile();
FileWriter fw = new FileWriter(file, true);
BufferedWriter bw = new BufferedWriter(fw);
//遍历表格中的段落并提取文本
for (int i = 0; i < table.getRows().getCount(); i++)
{
TableRow row = table.getRows().get(i);
for (int j = 0; j < row.getCells().getCount(); j++)
{
TableCell cell = row.getCells().get(j);
for (int k = 0; k < cell.getParagraphs().getCount(); k++) {
Paragraph paragraph = cell.getParagraphs().get(k);
bw.write(paragraph.getText() + "\t");
}
}
bw.write("\r\n");
}
bw.flush();
bw.close();
fw.close();
}
}。