java读取word文档
Java读取Word文本段落格式属性

Java读取Word⽂本段落格式属性本⽂介绍通过Java后端程序代码来读取Word⽂本和段落格式的⽅法。
本次测试环境如下:Word版本:2013编译环境:IntelliJ IDEA2018Work库:free spire.doc.jarJDK版本:1.8.0通过textrange.getCharacterFormat()⽅法读取⽂本字符串格式,通过paragraph.getFormat()读取段落格式,读取具体⽂字及段落属性时,可⽀持读取字体、字号、⽂字颜⾊、⽂字背景、⽂字是否加粗或倾斜、⽂字下划线、⼤⼩写、边框、上标下标、⾏距、段落缩进、对齐⽅式、段落边框、背景等等,下表中罗列了所有可⽀持读取的样式属性,供参考:读取⽂本格式 getCharacterFormat():⽅法类型getFontName()StringgetFontNameAscii()StringgetFontNameBidi()StringgetFontNameFarEast()StringgetFontNameNonFarEast()StringgetBold()booleangetFontSize()floatgetHighlightColor()ColorgetItalic()booleangetTextBackgroundColor()ColorgetTextColor()ColorgetAllCaps()booleangetAllowContextualAlternates()booleangetBidi()booleangetBoldBidi()booleangetBorder()BordergetCharacterSpacing()floatgetDoubleStrike()booleangetEmboss()booleangetEmphasisMark()EmphasisgetEngrave()booleangetFontSizeBidi()floatgetFontTypeHint()FontTypeHintgetHidden()booleangetItalicBidi()booleangetLigaturesType()LigatureTypegetLocaleIdASCII()shortgetLocaleIdFarEast()shortgetNumberFormType()NumberFormTypegetNumberSpaceType()NumberSpaceTypegetPosition()floatgetStylisticSetType()StylisticSetTypegetSubSuperScript()SubSuperScriptgetTextScale()shortgetUnderlineStyle()UnderlineStyle读取段落格式:getFormat()⽅法类型getLineSpacing()floatgetFirstLineIndent()floatgetLeftIndent()floatgetAfterSpacing()floatgetBeforeSpacing()floatgetRightIndent()floatgetRightIndent()float getTextAlignment()TextAlignmnet getAfterAutoSpacing()boolean getAutoSpaceDE()boolean getAutoSpaceDN()boolean getBackColor()Color getBeforeAutoSpacing()boolean getBoders()Borders getHorizontalAlignment()HorizontalAlignmnet getKeepFollow()boolean getKeepLines()boolean getLineSpacingRule()LineSpacingRule getMirrorIndents()boolean getOutlineLevel()OutlineLevel getOverflowPunc()boolean getPageBreakAfter()getPageBreakBefore()getSuppressAutoHyphens()getTabs()⽤于测试的Word⽂档:Java⽰例代码import com.spire.doc.*;import com.spire.doc.documents.Paragraph;import com.spire.doc.documents.TextSelection;import com.spire.doc.fields.TextRange;import java.awt.*;public class GetTextFormat {public static void main(String[] args) {//加载Word源⽂档Document doc = new Document();doc.loadFromFile("test.docx");//获取段落数量int count = doc.getSections().get(0).getParagraphs().getCount();System.out.println("总共含有段落数:" + count);//查找指定⽂本TextSelection textSelections = doc.findString("东野圭吾", false, true);//获取字体名称String fontname = textSelections.getAsOneRange().getCharacterFormat().getFontName();//获取字体⼤⼩float fontsize = textSelections.getAsOneRange().getCharacterFormat().getFontSize();System.out.println("字体名称:" + fontname +"\n"+"字体⼤⼩:"+fontsize);//获取第⼆段Paragraph paragraph2 = doc.getSections().get(0).getParagraphs().get(1);//获取段落⾏距float linespage = paragraph2.getFormat().getLineSpacing();System.out.println("段落⾏距:" + linespage);//遍历段落中的⼦对象for (int z = 0; z < paragraph2.getChildObjects().getCount(); z++){Object obj2 = paragraph2.getChildObjects().get(z);//判定是否为⽂本if (obj2 instanceof TextRange){TextRange textRange2 = (TextRange) obj2;//获取⽂本颜⾊Color textcolor = textRange2.getCharacterFormat().getTextColor();if (!(textcolor.getRGB() == 0)){System.out.println("⽂本颜⾊:" + textRange2.getText() + textcolor.toString());}//获取字体加粗效果boolean isbold = textRange2.getCharacterFormat().getBold();if (isbold == true){System.out.println("加粗⽂本:" + textRange2.getText());}//获取字体倾斜效果boolean isitalic = textRange2.getCharacterFormat().getItalic();if (isitalic == true){System.out.println("倾斜⽂本:" + textRange2.getText());}//获取⽂本背景String text = textRange2.getText();Color highlightcolor = textRange2.getCharacterFormat().getHighlightColor();//获取⽂本的⾼亮颜⾊(即突出显⽰颜⾊)if (!(highlightcolor.getRGB() == 0 )){System.out.println("⽂本⾼亮:" + text + highlightcolor.toString());//输出⾼亮的⽂本和颜⾊}Color textbackgroundcolor = textRange2.getCharacterFormat().getTextBackgroundColor();//获取⽂字背景(底纹)if (!(textbackgroundcolor.getRGB()==0)){System.out.println("⽂本背景:" + text + textbackgroundcolor.toString());//输出有背景的⽂本和颜⾊}}}}}运⾏程序,输⼊获取结果:。
用Java读取Word文档

用Java读取Word文档由于Word的编码方式比较复杂,所以Word文档不可能通过流的方式直接读取;当然如果Word可以转化成TXT文件就可以直接读取了;目前读取Word比较好的开源工具是Poi及Jacob,感觉Poi读取功能要比Jacob略逊一筹,毕竟Jacob可以直接调用Word的COM组件;但是微软产品不开放源码,所以Jacob读取Word文档也只能是摸着石头过河,一点一点破解了。
Jacob读取Word内容,由于Word内容的复杂性,读取也是非常不方便的,目前可以有"按段落读取","按书签读取"及"按照表格读取"等几种形式。
示例讲解(通过Java FileReader,Jacob两种方式读取Word内容)一.通过java流读取Word内容复制代码1.import java.io.BufferedReader;2.import java.io.FileReader;3.import java.io.IOException;4.5.public class ReadWordByStream {6.public static void main(String[] args) throws IOException {7. String rowContent = new String();8. String content = new String();9. BufferedReader in = new BufferedReader(new FileReader("d:\\test3.doc"));10. while ((rowContent = in.readLine()) != null) {11.content = content + rowContent + "\n";12. }13. System.out.println(content.getBytes());14. System.out.println(new String(content.getBytes(),"utf-8"));//因为编码方式不同,不容易解析15. in.close();16.}17.18.}二.通过Jacob读取Word内容复制代码1.import com.jacob.activeX.ActiveXComponent;2.import Thread;3.import .Dispatch;4.import .Variant;5.6.public class WordReader {7.public static void main(String args[]) {8. ComThread.InitSTA();// 初始化com的线程9. ActiveXComponent wordApp = new ActiveXComponent("Word.Application"); // 启动word10. // Set the visible property as required.11. Dispatch.put(wordApp, "Visible", new Variant(true));// //设置word可见12. Dispatch docs = wordApp.getProperty("Documents").toDispatch();//所有文档窗口13.// String inFile = "d:\\test.doc";14.// Dispatch doc = Dispatch.invoke(docs,"Open",Dispatch.Method,15.// new Object[] { inFile, new Variant(false),new Variant(false) },//参数3,false:可写,true:只读16.// new int[1]).toDispatch();//打开文档17.18. Dispatch doc = Dispatch.call(docs, "Add").toDispatch(); //创建一个新文档19. Dispatch wordContent = Dispatch.get(doc, "Content").toDispatch(); //取得word文件的内容20. Dispatch font = Dispatch.get(wordContent, "Font").toDispatch();21. Dispatch.put(font, "Bold", new Variant(true)); // 设置为粗体22.Dispatch.put(font, "Italic", new Variant(true)); // 设置为斜体23.Dispatch.put(font, "Underline", new Variant(true));24.Dispatch.put(font, "Name", new Variant("宋体"));25.Dispatch.put(font, "Size", new Variant(14));26. for(int i=0;i<10;i++){//作为一个段落27.Dispatch.call(wordContent, "InsertAfter", "current paragraph"+i+" ");28. }29. for(int j=0;j<10;j++){//作为十个段落30. Dispatch.call(wordContent, "InsertAfter", "current paragraph"+j+"\r");31.}32. Dispatch paragraphs = Dispatch.get(wordContent, "Paragraphs")33. .toDispatch(); //所有段落34. int paragraphCount = Dispatch.get(paragraphs, "Count").getInt();35. System.out.println("paragraphCount:"+paragraphCount);36.37. for (int i = 1; i <= paragraphCount; i++) {38.Dispatch paragraph = Dispatch.call(paragraphs, "Item",39.new Variant(i)).toDispatch();40.Dispatch paragraphRange = Dispatch.get(paragraph, "Range")41..toDispatch();42.String paragraphContent = Dispatch.get(paragraphRange, "Text")43..toString();44.System.out.println(paragraphContent);45.//Dispatch.call(selection, "MoveDown");46. }47. // WordReader.class.getClass().getResource("/").getPath().substring+"test.doc";48. Dispatch.call(doc, "SaveAs","d:\\wordreader.doc");49. // Close the document without saving changes50. // 0 = wdDoNotSaveChanges51. // -1 = wdSaveChanges52. // -2 = wdPromptToSaveChanges53. ComThread.Release();//释放com线程54. Dispatch.call(docs, "Close", new Variant(0));55. docs = null;56. Dispatch.call(wordApp,"Quit");57. wordApp = null;58.}59.}用Java简单的读取word文档中的数据:第一步:下载tm-extractors-0.4.jar下载地址:/browser/elated-core/trunk/lib/tm-extractors-0.4.jar?rev =46并把它放到你的classpath路径下面。
Java 读取Word中的文本的图片

Java 提取Word中的文本和图片本文将介绍通过Java来提取或读取Word文档中文本和图片的方法。
这里提取文本和图片包括同时提取文档正文当中以及页眉、页脚中的的文本和图片。
使用工具:Spire.Doc for JavaJar文件导入方法(参考):方法1:下载jar文件包。
下载后解压文件,并将lib文件夹下的Spire.Doc.jar文件导入到java程序。
导入效果参考如下:方法2:可通过maven导入。
参考导入方法。
测试文档如下:Java 代码示例(供参考)【示例1】提取Word 中的文本 import com.spire.doc.*; import java.io.FileWriter;import java.io.IOException;public class ExtractText {public static void main(String[] args) throws IOException{//加载测试文档Document doc = new Document();doc.loadFromFile("test.docx");//获取文本保存为StringString text = doc.getText();//将String写入TxtwriteStringToTxt(text,"提取文本.txt");}public static void writeStringToTxt(String content, String txtFileName) throws IOException {FileWriter fWriter= new FileWriter(txtFileName,true);try {fWriter.write(content);}catch(IOException ex){ex.printStackTrace();}finally{try{fWriter.flush();fWriter.close();} catch (IOException ex) {ex.printStackTrace();}}}}文本提取结果:【示例2】提取Word中的图片import com.spire.doc.Document;import com.spire.doc.documents.DocumentObjectType;import com.spire.doc.fields.DocPicture;import com.spire.doc.interfaces.ICompositeObject;import com.spire.doc.interfaces.IDocumentObject;import javax.imageio.ImageIO;import java.awt.image.RenderedImage;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.LinkedList;import java.util.List;import java.util.Queue;public class ExtractImg {public static void main(String[] args) throws IOException { //加载Word文档Document document = new Document();document.loadFromFile("test.docx");//创建Queue对象Queue nodes = new LinkedList();nodes.add(document);//创建List对象List images = new ArrayList();//遍历文档中的子对象while (nodes.size() > 0) {ICompositeObject node = (ICompositeObject) nodes.poll();for (int i = 0; i < node.getChildObjects().getCount(); i++) {IDocumentObject child = node.getChildObjects().get(i);if (child instanceof ICompositeObject) {nodes.add((ICompositeObject) child);//获取图片并添加到Listif (child.getDocumentObjectType() == DocumentObjectType.Picture) { DocPicture picture = (DocPicture) child;images.add(picture.getImage());}}}}//将图片保存为PNG格式文件for (int i = 0; i < images.size(); i++) {File file = new File(String.format("图片-%d.png", i));ImageIO.write((RenderedImage) images.get(i), "PNG", file);}}}图片提取结果:(本文完)。
Java读取Word模板替换内容并另存

Java读取Word模板替换内容并另存⽤到的⼯具:,⽂件解压后主要有三个⽂件:jacob.jar、jacob-1.17-M2-x64.dll和jacob-1.17-M2-x86.dll。
jacob.jar引⼊到项⽬⼯程中,jacob-1.17-M2-x64.dll放在C:\Windows\System32下,如果系统是32位的则把jacob-1.17-M2-x86.dll放在C:\Windows\System32下。
注意:⽂件名要⽤.doc,万不能⽤.docx。
那样会打不开⽂件代码⽰例:/** Java2word.java** Created on 2007年8⽉13⽇, 上午10:32** To change this template, choose Tools | Template Manager* and open the template in the editor.*//** 传⼊数据为HashMap对象,对象中的Key代表word模板中要替换的字段,Value代表⽤来替换的值。
* word模板中所有要替换的字段(即HashMap中的Key)以特殊字符开头和结尾,如:$code$、$date$……,以免执⾏错误的替换。
* 所有要替换为图⽚的字段,Key中需包含image或者Value为图⽚的全路径(⽬前只判断⽂件后缀名为:.bmp、.jpg、.gif)。
* 要替换表格中的数据时,HashMap中的Key格式为“table$R@N”,其中:R代表从表格的第R⾏开始替换,N代表word模板中的第N张表格;Value为ArrayList对象,ArrayList中包含的对象统⼀为String[],⼀条String[]代表⼀⾏数据,ArrayList中第⼀条记录为特殊记录,记录的是表格中要替换的列号,如:要替换第⼀列、第三列、第五列的数据,则第⼀条记录为String[3] {“1”,”3”,”5”}。
JAVA-实现-利用POI读取word文档实例

JAVA-实现-利⽤POI读取word⽂档实例package read.document;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStream;import java.sql.Connection;import java.util.ArrayList;import java.util.List;import org.apache.poi.hwpf.HWPFDocument;import ermodel.CharacterRun;import ermodel.Range;import pers.mysql.DBUtil;import pers.mysql.MysqlDao;import pers.mysql.MysqlDaoImp;public class WordReading {public static void main(String[] args) {String filePath = "*****.doc";readOnWord(filePath);}public static void readOnWord(String filePath) {if (filePath.endsWith(".doc")) {// 输⼊流-基类InputStream is = null;try {is = new FileInputStream(filePath);} catch (FileNotFoundException e) {e.printStackTrace();System.out.println("⽂件打开失败。
");}// 加载doc⽂档try {HWPFDocument doc = new HWPFDocument(is);Range text = doc.getRange();// 整个⽂档/** 分解word:⽂本 ->⼩节 ->段落 ->characterRun(理解为⼩单元)* section -⼩节; paragraph - 段落*///1分出内容节点Range hotWord = text.getSection(2);// 0-封⾯,1-⽬录,2-⽂本;第3⼩节//2段落处理/** 维护两个变量** 热词和解释区别:⼤⼩-word:26,explaining:18**/String word = "";String explaining = "";int wordOK = 0;int explainOK = 0;// 判断当前word&explain是否可以填⼊数据库int count = 24;// 读取⼏条数据到数据库int begin = 2;// 段落读取位置for (int i = 0; i < count;) {Range para = hotWord.getParagraph(begin);CharacterRun field = para.getCharacterRun(0);int fontSize = field.getFontSize();if (fontSize == 26) {word = para.text();wordOK = 1;begin++;} else {while (fontSize < 26) {explaining += para.text();begin++;para = hotWord.getParagraph(begin); field = para.getCharacterRun(0);fontSize = field.getFontSize();}explainOK = 1;}// 判断word&explain是否可以填⼊数据库if (wordOK == 1 && explainOK == 1) {MysqlDaoImp.addData(word, explaining); i++;//填⼊数据库后,⼀切归"0"wordOK = 0;explainOK = 0;word="";explaining="";}}// 输出测试// System.out.println("读取:" + "head:");} catch (IOException e) {e.printStackTrace();System.out.println("IO错误。
java中读取word文档里的内容

Iterator<XWPFTable> itTable = document.getTablesIterator(); int ind = 0; while (itTable.hasNext()){
ind++; XWPFTable table = (XWPFTable) itTable.next(); //行 int rcount = table.getNumberOfRows(); for (int i = 0; i < rcount; i++){
// OPCPt.openPackage(srcPath); // XWPFDocument doc = new XWPFDocument(pack); return xdoc; } catch (IOException e) { System.out.println("读取文件出错!"); e.printStackTrace(); return null; } } return null; }
}
//pom.xml文件
<project xmlns="/POM/4.0.0" xmlns:xsi="/2001/XMLSchema-instance" xsi:schemaLocation="/POM/4.0.0 /xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId></groupId> <artifactId>excelReadAndWrite</artifactId> <version>0.0.1-SNAPSHOT</version> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.directory.studio</groupId> <artifactId>mons.codec</artifactId> <version>1.8</version> </dependency> <dependency> <groupId>net.sourceforge.jexcelapi</groupId> <artifactId>jxl</artifactId> <version>2.6.12</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.9</version> </dependency> </dependencies>
java读取word并解析

java读取word并解析java POI3.8处理word模板,⽂字图⽚表格将word模板⾥⾯的特殊标签换成⽂字,图⽚,以下是处理的代码特殊标签最好的复制粘贴到word模板⾥⾯ ,因为⼿动敲⼊可能有点⼩的差别都导致这个标签不是⼀⼩块(chunk)这样会⽆法识别,⽂字样式设置的时候也最好选择特殊标签整体进⾏设置,尽量不要多选(例如标签后⾯跟上⼀个空格)这⾥的替换包含了⽂字样式的替换,图⽚的替换-------------------------------------------------------------------------------------------------------------------------------------package com.util.export;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.apache.poi.POIXMLDocument;import org.apache.poi.openxml4j.exceptions.InvalidFormatException;import ermodel.ParagraphAlignment;import ermodel.UnderlinePatterns;import ermodel.VerticalAlign;import ermodel.XWPFDocument;import ermodel.XWPFParagraph;import ermodel.XWPFRun;import ermodel.XWPFTable;import ermodel.XWPFTableCell;import ermodel.XWPFTableRow;import org.apache.xmlbeans.XmlException;import org.apache.xmlbeans.XmlToken;import org.openxmlformats.schemas.drawingml.x2006.main.CTNonVisualDrawingProps;import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D;import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;public class WordCompileReport {public static void searchAndReplace(String srcPath, String destPath,Map map,Map mapImage) {try {XWPFDocument document = new XWPFDocument(POIXMLDocument.openPackage(srcPath));//替换表格占位符checkTables(document,map);//替换段落占位符checkParagraphs(document,map);//在末尾添加⽂字addParagraphToWord(document,"这⾥添加⽂字",30,0,"#EB9074",true);//替换图⽚replaceTextToImage(document,mapImage,200,200);FileOutputStream outStream = null;outStream = new FileOutputStream(destPath);document.write(outStream);outStream.close();} catch (Exception e) {e.printStackTrace();}}public static void checkTables(XWPFDocument document,Map map) {Iterator it = document.getTablesIterator();while (it.hasNext()) {XWPFTable table = (XWPFTable) it.next();int rcount = table.getNumberOfRows();for (int i = 0; i < rcount; i++) {XWPFTableRow row = table.getRow(i);List cells = row.getTableCells();for (XWPFTableCell cell : cells) {List listCell;for (Entry e : map.entrySet()) {listCell = cell.getParagraphs();List cellRun;Map mapAttr = new HashMap();for (int j = 0; j < listCell.size(); j++) {if (listCell.get(j).getText().indexOf(e.getKey()) != -1) {cellRun = listCell.get(j).getRuns();for (int c = 0; c < cellRun.size(); c++) {if (cellRun.get(c).getText(0).equals(e.getKey())) {mapAttr = getWordXWPFRunStyle(cellRun.get(c));listCell.get(j).removeRun(c);XWPFRun newRun = listCell.get(j).insertNewRun(c);setWordXWPFRunStyle(newRun, mapAttr,e.getValue(), false);}}}}}}}}}public static void checkParagraphs(XWPFDocument document,Map map){List listRun;Map mapAttr = new HashMap();List listParagraphs = document.getParagraphs();for (int sa = 0; sa < listParagraphs.size(); sa++) {for (Entry e : map.entrySet()) {if (listParagraphs.get(sa).getText().indexOf(e.getKey()) != -1) {listRun = listParagraphs.get(sa).getRuns();for (int p = 0; p < listRun.size(); p++) {if (listRun.get(p).toString().equals(e.getKey())) {//得到占位符的⽂本格式XWPFRun runOld = listParagraphs.get(sa).getRuns().get(p);mapAttr=getWordXWPFRunStyle(runOld); //封装该占位符⽂本样式到map listParagraphs.get(sa).removeRun(p);//移除占位符//创建设置对应占位符的⽂本XWPFRun runNew = listParagraphs.get(sa).insertNewRun(p);setWordXWPFRunStyle(runNew,mapAttr,e.getValue(),true);}}}}}}public static Map getWordXWPFRunStyle(XWPFRun runOld){Map mapAttr = new HashMap();mapAttr.put("Color", runOld.getColor());if(-1==runOld.getFontSize()){mapAttr.put("FontSize", 12);}else{mapAttr.put("FontSize", runOld.getFontSize());}mapAttr.put("Subscript", runOld.getSubscript());mapAttr.put("Underline", runOld.getUnderline());mapAttr.put("FontFamily",runOld.getFontFamily());return mapAttr;}public static XWPFRun setWordXWPFRunStyle(XWPFRun runNew,Map mapAttr,String text,boolean flag){ runNew.setColor((String) mapAttr.get("Color"));if("-1".equals(mapAttr.get("FontSize").toString())){//处理⼩四字号读取为-1的问题runNew.setFontSize(12);}else{runNew.setFontSize((Integer) mapAttr.get("FontSize"));}runNew.setBold(flag);runNew.setUnderline((UnderlinePatterns) mapAttr.get("Underline"));runNew.setText(text);runNew.setSubscript((VerticalAlign) mapAttr.get("Subscript"));runNew.setFontFamily((String) mapAttr.get("FontFamily"));return runNew;}public static void updatePicture(XWPFDocument document,int id, int width, int height) {if(id==0){id = document.getAllPictures().size()-1;}final int EMU = 9525;width *= EMU;height *= EMU;String blipId = document.getAllPictures().get(id).getPackageRelationship().getId();CTInline inline = document.createParagraph().createRun().getCTR().addNewDrawing().addNewInline();String picXml = ""+ ""+ " "+ " "+ " " + "+ id+ "\" name=\"Generated\"/>"+ " "+ " "+ " "+ "+ blipId+ "\" xmlns:r=\"/officeDocument/2006/relationships\"/>"+ " "+ " "+ " "+ " "+ " "+ " "+ " "+ "+ width+ "\" cy=\""+ height+ "\"/>"+ " "+ " "+ " "+ " "+ " "+ " "+ " " + "";// CTGraphicalObjectData graphicData =inline.addNewGraphic().addNewGraphicData();XmlToken xmlToken = null;try {xmlToken = XmlToken.Factory.parse(picXml);} catch (XmlException xe) {xe.printStackTrace();}inline.set(xmlToken);// graphicData.set(xmlToken);inline.setDistT(0);inline.setDistB(0);inline.setDistL(0);inline.setDistR(0);CTPositiveSize2D extent = inline.addNewExtent();extent.setCx(width);extent.setCy(height);CTNonVisualDrawingProps docPr = inline.addNewDocPr();docPr.setId(id);docPr.setName("IMG_" + id);docPr.setDescr("IMG_" + id);}public static void addPictureToWord(XWPFDocument document,String imagePath,int imageType,int width,int height){if(0==imageType){imageType=XWPFDocument.PICTURE_TYPE_JPEG;}try {String ind = document.addPictureData(new FileInputStream(imagePath), imageType);} catch (InvalidFormatException e) {e.printStackTrace();} catch (FileNotFoundException e) {e.printStackTrace();}updatePicture(document,document.getAllPictures().size()-1,400,400);}public static void addParagraphToWord(XWPFDocument document,String text,int fontSize,int alignment,String RGBColor,boolean isBold){XWPFParagraph paragraph = document.createParagraph();if(1==alignment){paragraph.setAlignment(ParagraphAlignment.CENTER);}else if(2==alignment){paragraph.setAlignment(ParagraphAlignment.CENTER);}else if(3==alignment){paragraph.setAlignment(ParagraphAlignment.RIGHT);}else{paragraph.setIndentationLeft(alignment);}XWPFRun runOne = paragraph.createRun();runOne.setText(text);runOne.setBold(isBold);runOne.setFontSize(fontSize);if(RGBColor.startsWith("#")){runOne.setColor(RGBColor.substring(1));}else{runOne.setColor(RGBColor);}}public static void addRunToParagraph(XWPFParagraph paragraph,String text,int fontSize,String RGBColor,boolean isBold,boolean isWrap){XWPFRun runText = paragraph.createRun();// runText.setStrike(true); //删除线runText.setBold(isBold);runText.setColor(RGBColor);runText.setFontSize(fontSize);runText.setText(text);if(isWrap)runText.addBreak();}public static void replaceTextToImage(XWPFDocument document,Map mapImage,int width,int height){List listRun;List listParagraphs = document.getParagraphs();for (int sa = 0; sa < listParagraphs.size(); sa++) {for (Entry e : mapImage.entrySet()) {if (listParagraphs.get(sa).getText().indexOf(e.getKey()) != -1) {listRun = listParagraphs.get(sa).getRuns();for (int p = 0; p < listRun.size(); p++) {if (listRun.get(p).toString().equals(e.getKey())) {listParagraphs.get(sa).removeRun(p);//移除占位符//获得当前CTInlineCTInline inline = listParagraphs.get(sa).createRun().getCTR().addNewDrawing().addNewInline();try {insertPicture(document,e.getValue(),inline,width,height);} catch (InvalidFormatException e1) {e1.printStackTrace();} catch (FileNotFoundException e1) {e1.printStackTrace();}}}}}}}public static void insertPicture(XWPFDocument document,String filePath,CTInline inline,int width, int height) throws InvalidFormatException, FileNotFoundException{String ind = document.addPictureData(new FileInputStream(filePath), 5);int id = document.getAllPictures().size()-1;final int EMU = 9525;width *= EMU;height *= EMU;String blipId = document.getAllPictures().get(id).getPackageRelationship().getId();String picXml = ""+ ""+ " "+ " "+ " " + "+ id+ "\" name=\"Generated\"/>"+ " "+ " "+ " "+ "+ blipId+ "\" xmlns:r=\"/officeDocument/2006/relationships\"/>"+ " "+ " "+ " "+ " "+ " "+ " "+ " "+ "+ width+ "\" cy=\""+ height+ "\"/>"+ " "+ " "+ " "+ " "+ " "+ " "+ " " + "";inline.addNewGraphic().addNewGraphicData();XmlToken xmlToken = null;try {xmlToken = XmlToken.Factory.parse(picXml);} catch (XmlException xe) {xe.printStackTrace();}inline.set(xmlToken);inline.setDistT(0);inline.setDistB(0);inline.setDistL(0);inline.setDistR(0);CTPositiveSize2D extent = inline.addNewExtent();extent.setCx(width);extent.setCy(height);CTNonVisualDrawingProps docPr = inline.addNewDocPr(); docPr.setId(id);docPr.setName("IMG_" + id);docPr.setDescr("IMG_" + id);}public static void main(String[] args) {HashMap map = new HashMap();HashMap mapImage = new HashMap();map.put("${name}$", "02");map.put("${userIDs}$", "5201314");mapImage.put("${image1}$", "F:\\A.jpg");mapImage.put("${image2}$", "F:\\B.jpg");String srcPath = "c:\\zhenli\\cc.docx";String destPath = "c:\\zhenli\\输出模版.docx";searchAndReplace(srcPath, destPath, map,mapImage);}}。
Java 读取Word中的脚注、尾注

Java 读取Word中的脚注、尾注本文介绍读取Word中的脚注及尾注的方法,添加脚注、尾注可以参考这篇文章。
注:本文使用了Word类库(Free Spire.Doc for Java 免费版)来读取,获取该类库可通过官网下载,并解压文件,将lib文件夹下的Spire.Doc.jar导入java程序;或者通过maven仓库安装导入。
测试文档如下,包含脚注及尾注:1. 读取Word脚注import com.spire.doc.*;import com.spire.doc.documents.Paragraph;import com.spire.doc.fields.Footnote;import com.spire.doc.fields.TextRange;import java.util.List;public class ExtractFootnoteAndEndnote {public static void main(String[] args) {//创建Document实例Document doc = new Document();doc.loadFromFile("test1.docx");//获取文档中的所有脚注List<Footnote> footNotes = doc.getFootnotes();//实例化String类型变量String str = "";//遍历脚注for (Footnote footNote :footNotes) {//遍历脚注中的段落for (int j = 0; j < footNote.getTextBody().getParagraphs().getCount(); j++) { Paragraph paragraph = footNote.getTextBody().getParagraphs().get(j);//遍历段落中的对象for(Object object : paragraph.getChildObjects()){//读取文本if (object instanceof TextRange) {TextRange textRange = (TextRange) object;str = str + textRange.getText();}}}}//输出脚注文本System.out.println(str);}}脚注读取结果:2. 读取Word尾注import com.spire.doc.*;import com.spire.doc.documents.Paragraph;import com.spire.doc.fields.Footnote;import com.spire.doc.fields.TextRange;import java.util.List;public class ExtractFootnoteAndEndnote {public static void main(String[] args) {//创建Document实例Document doc = new Document();doc.loadFromFile("test1.docx");//获取所有尾注List<Footnote> endNotes = doc.getEndnotes();//实例化String类型变量String str = "";//遍历尾注for (Footnote endnote :endNotes) {//遍历尾注中的段落for (int j = 0; j < endnote.getTextBody().getParagraphs().getCount(); j++) { Paragraph paragraph = endnote.getTextBody().getParagraphs().get(j);//遍历段落中的对象for(Object object : paragraph.getChildObjects()){//读取文本if (object instanceof TextRange) {TextRange textRange = (TextRange) object;str = str + textRange.getText();}}}}//输出尾注文本System.out.println(str);}}尾注读取结果:(本文完)。
- 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
- 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
- 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。
java读取word文档时,虽然网上介绍了很多插件poi、java2Word、jacob、itext等等,poi无法读取格式(新的API估计行好像还在处于研发阶段,不太稳定,做项目不太敢用);java2Word、jacob容易报错找不到注册,比较诡异,我曾经在不同的机器上试过,操作方法完全一致,有的机器不报错,有的报错,去他们论坛找高人解决也说不出原因,项目部署用它有点玄;itxt好像写很方便但是我查了好久资料没有见到过关于读的好办法。
经过一番选择还是折中点采用rtf最好,毕竟rtf是开源格式,不需要借助任何插件,只需基本IO操作外加编码转换即可。
rtf格式文件表面看来和doc没啥区别,都可以用word打开,各种格式都可以设定。
----- 实现的功能:读取rtf模板内容(格式和文本内容),替换变化部分,形成新的rtf文档。
----- 实现思路:模板中固定部分手动输入,变化的部分用$info$表示,只需替换$info$即可。
1、采用字节的形式读取rtf模板内容2、将可变的内容字符串转为rtf编码3、替换原文中的可变部分,形成新的rtf文档主要程序如下:/*** 将制定的字符串转换为rtf编码*/public String bin2hex(String bin) {char[] digital = "0123456789ABCDEF".toCharArray();StringBuffer sb = new StringBuffer("");byte[] bs = bin.getBytes();int bit;for (int i = 0; i < bs.length;i++) {bit = (bs[i] & 0x0f0) >> 4;sb.append("\\'");sb.append(digital[bit]);bit = bs[i] & 0x0f;sb.append(digital[bit]);}return sb.toString();}/*** 字节形式读取rtf模板内容*/public String readByteRtf(InputStream ins, String path){String sourcecontent = "";try{ins = new FileInputStream(path);byte[] b = new byte[1024];if (ins == null) {System.out.println("源模板文件不存在");}int bytesRead = 0;while (true) {bytesRead = ins.read(b, 0, 1024); // return final read bytes countsif(bytesRead == -1) {// end of InputStreamSystem.out.println("读取模板文件结束");break;}sourcecontent += new String(b, 0, bytesRead); // convert to string using bytes }}catch(Exception e){e.printStackTrace();}}以上为核心代码,剩余部分就是替换,从新组装java中的String.replace(oldstr,newstr);方法可以实现,在这就不贴了。
源代码部分详见附件。
运行源代码前提:c盘创建YQ目录,将附件中"模板.rtf"复制到YQ目录之下,运行OpreatorRTF.java文件即可,就会在YQ目录下生成文件名如:21时15分19秒_cheney_记录.rtf 的文件。
文件名是在程序中指定的呵呵。
由于是由商业软件中拆分出的demo所以只是将自己原来的写的程序分离,合并在一个java文件中,所以有的方法在示例程序中看似多余,没有必要那么麻烦。
对于替换部分需要循环的特例程序,我不好拆分,里面很容易暴露商业软件的东西,所以就不贴了,有需要的话可以加我QQ或者MSN,一起讨论呵呵。
附件传了半天也没有传上去,没有办法只有这样搞了呵呵。
模板文件附件无法存放,需要的直接联系呵呵。
其实直接看以下的java程序部分,就会明白/*** 以下部分是java源代码*/package com;import java.io.File;import java.io.FileInputStream;import java.io.FileWriter;import java.io.IOException;import java.io.InputStream;import java.io.PrintWriter;import java.text.SimpleDateFormat;import java.util.Date;public class OperatorRTF {/*** 字符串转换为rtf编码* @param content* @return*/public String strToRtf(String content){char[] digital = "0123456789ABCDEF".toCharArray();StringBuffer sb = new StringBuffer("");byte[] bs = content.getBytes();int bit;for (int i = 0; i < bs.length; i++) {bit = (bs[i] & 0x0f0) >> 4;sb.append("\\'");sb.append(digital[bit]);bit = bs[i] & 0x0f;sb.append(digital[bit]);}return sb.toString();}/*** 替换文档的可变部分* @param content* @param replacecontent* @param flag* @return*/public String replaceRTF(String content,String replacecontent,int flag){ String rc = strToRtf(replacecontent);String target = "";/*if(flag==0){target = content.replace("$time$",rc);}*/if(flag==0){target = content.replace("$timetop$",rc);}if(flag==1){target = content.replace("$info$",rc);}if(flag==2){target = content.replace("$idea$",rc);if(flag==3){target = content.replace("$advice$",rc);}if(flag==4){target = content.replace("$infosend$",rc);}return target;}/*** 获取文件路径* @param flag* @return*/public String getSavePath() {String path = "C:\\YQ";File fDirecotry = new File(path);if (!fDirecotry.exists()) {fDirecotry.mkdirs();return path;}/*** 半角转为全角*/public String ToSBC(String input){char[] c = input.toCharArray();for (int i = 0; i < c.length; i++){if (c[i] == 32){c[i] = (char) 12288;continue;}if (c[i] < 127){c[i] = (char) (c[i] + 65248);}}return new String(c);}public void rgModel(String username, String content) {// TODO Auto-generated method stub/* 构建生成文件名targetname:12时10分23秒_username_记录.rtf */Date current=new Date();SimpleDateFormat sdf=new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String targetname = sdf.format(current).substring(11,13) + "时";targetname += sdf.format(current).substring(14,16) + "分";targetname += sdf.format(current).substring(17,19) + "秒";targetname += "_" + username +"_记录.rtf";/* 字节形式读取模板文件内容,将结果转为字符串*/String strpath = getSavePath();String sourname = strpath+"\\"+"模板.rtf";String sourcecontent = "";InputStream ins = null;try{ins = new FileInputStream(sourname);byte[] b = new byte[1024];if (ins == null) {System.out.println("源模板文件不存在");}int bytesRead = 0;while (true) {bytesRead = ins.read(b, 0, 1024); // return final read bytes countsif(bytesRead == -1) {// end of InputStreamSystem.out.println("读取模板文件结束");break;}sourcecontent += new String(b, 0, bytesRead); // convert to string using bytes }}catch(Exception e){e.printStackTrace();}/* 修改变化部分*/String targetcontent = "";/*** 拆分之后的数组元素与模板中的标识符对应关系* array[0]:timetop* array[1]:info* array[2]:idea* array[3]:advice* array[4]:infosend*/String array[] = content.split("~");/*** 2008年11月27日:更新模板之后时间无需自动填充*//*String nowtime = sdf.format(current).substring(0,4) + "年";nowtime += sdf.format(current).substring(5,7) + "月";nowtime += sdf.format(current).substring(8,10) + "日";*/for(int i=0;i<array.length;i++){/*if(i==0){targetcontent = documentDoc.replaceRTF(sourcecontent,nowtime,i);}else{targetcontent = documentDoc.replaceRTF(targetcontent,array[i-1],i);}*/if(i==0){targetcontent = replaceRTF(sourcecontent, array[i], i);}else{targetcontent = replaceRTF(targetcontent, array[i], i);}}/* 结果输出保存到文件*/try {FileWriter fw = new FileWriter(getSavePath()+"\\" + targetname,true);PrintWriter out = new PrintWriter(fw);if(targetcontent.equals("")||targetcontent==""){out.println(sourcecontent);}else{out.println(targetcontent);}out.close();fw.close();System.out.println(getSavePath()+" 该目录下生成文件" + targetname + " 成功");} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}public static void main(String[] args) {// TODO Auto-generated method stubOperatorRTF oRTF = new OperatorRTF();/*** 被替换内容以"~"符号分割,处理的时候将其拆分为数组即可*/String content = "2008年10月12日9时-2008年10月12日6时~我们参照检验药品的方法~我们参照检验药品的方法~我们参照检验药品的方法~我们参照检验药品的方法";oRTF.rgModel("cheney",content);}}。