org.apache.pdfbox pdfbox2.0.24 technology.tabula tabula1.0.3 com.fasterxml.jackson.core jackson-databind2.9.5
代碼private static void parse() throws ParseException, IOException {
long start = System.currentTimeMillis();
String src = "C:\\Users\\賬單\\表單.pdf";
String[] argsa = new String[]{"-f=JSON","-p=all", src,"-l"};
//CommandLineApp.main(argsa);
CommandLineParser parser = new DefaultParser();
CommandLine cmd = parser.parse(CommandLineApp.buildOptions(), argsa);
StringBuilder stringBuilder = new StringBuilder();
new CommandLineApp(stringBuilder, cmd).extractTables(cmd);
ObjectMapper objectMapper = new ObjectMapper();
JavaType javaType = objectMapper.getTypeFactory().constructParametricType(ArrayList.class, TabulaPageDTO.class);
objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
Listpages = objectMapper.readValue(stringBuilder.toString(), javaType);
pages.stream().flatMap(p ->p.getData().stream()).forEach(row ->{
row.forEach(a ->System.out.print(a.getText().replaceAll("\r|\n|\t", "").trim() + " "));
System.out.println();
});
long end = System.currentTimeMillis();
long cost = end - start;
System.out.println("解析耗時(shí):" + cost);
}
//單元格DTO
public class TabulaAreaDTO {
private String text;
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
}
//頁(yè)DTO
public class TabulaPageDTO {
private List>data;
public List>getData() {
return data;
}
public void setData(List>data) {
this.data = data;
}
}
說(shuō)明好用的話記得點(diǎn)贊收藏哦?。。?/p>
你是否還在尋找穩(wěn)定的海外服務(wù)器提供商?創(chuàng)新互聯(lián)www.cdcxhl.cn海外機(jī)房具備T級(jí)流量清洗系統(tǒng)配攻擊溯源,準(zhǔn)確流量調(diào)度確保服務(wù)器高可用性,企業(yè)級(jí)服務(wù)器適合批量采購(gòu),新人活動(dòng)首月15元起,快前往官網(wǎng)查看詳情吧