Java讀取網(wǎng)頁(yè)內(nèi)容并下載圖片的實(shí)例
“真誠(chéng)服務(wù),讓網(wǎng)絡(luò)創(chuàng)造價(jià)值”是我們的服務(wù)理念,成都創(chuàng)新互聯(lián)團(tuán)隊(duì)十多年如一日始終堅(jiān)持在網(wǎng)站建設(shè)領(lǐng)域,為客戶(hù)提供優(yōu)質(zhì)服。不管你處于什么行業(yè),助你輕松跨入“互聯(lián)網(wǎng)+”時(shí)代,PC網(wǎng)站+手機(jī)網(wǎng)站+公眾號(hào)+小程序定制開(kāi)發(fā)。
很多人在第一次了解數(shù)據(jù)采集的時(shí)候,可能無(wú)從下手,尤其是作為一個(gè)新手,更是感覺(jué)很是茫然,所以,在這里分享一下自己的心得,希望和大家一起分享技術(shù),如果有什么不足,還請(qǐng)大家指正。寫(xiě)出這篇目的,就是希望大家一起成長(zhǎng),我也相信技術(shù)之間沒(méi)有高低,只有互補(bǔ),只有分享,才能使彼此更加成長(zhǎng)。
示例代碼:
import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; public class GetContentPicture { public void getHtmlPicture(String httpUrl) { URL url; BufferedInputStream in; FileOutputStream file; try { System.out.println("取網(wǎng)絡(luò)圖片"); String fileName = httpUrl.substring(httpUrl.lastIndexOf("/")); String filePath = "./pic/"; url = new URL(httpUrl); in = new BufferedInputStream(url.openStream()); file = new FileOutputStream(new File(filePath+fileName)); int t; while ((t = in.read()) != -1) { file.write(t); } file.close(); in.close(); System.out.println("圖片獲取成功"); } catch (MalformedURLException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public String getHtmlCode(String httpUrl) throws IOException { String content =""; URL uu = new URL(httpUrl); // 創(chuàng)建URL類(lèi)對(duì)象 BufferedReader ii = new BufferedReader(new InputStreamReader(uu .openStream())); // //使用openStream得到一輸入流并由此構(gòu)造一個(gè)BufferedReader對(duì)象 String input; while ((input = ii.readLine()) != null) { // 建立讀取循環(huán),并判斷是否有讀取值 content += input; } ii.close(); return content; } public void get(String url) throws IOException { String searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")"; String searchImgReg2 = "(?x)(src|SRC|background|BACKGROUND)=('|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")"; String content = this.getHtmlCode(url); System.out.println(content); Pattern pattern = Pattern.compile(searchImgReg); Matcher matcher = pattern.matcher(content); while (matcher.find()) { System.out.println(matcher.group(3)); this.getHtmlPicture(url+matcher.group(3)); } pattern = Pattern.compile(searchImgReg2); matcher = pattern.matcher(content); while (matcher.find()) { System.out.println(matcher.group(3)); this.getHtmlPicture(matcher.group(3)); } // searchImgReg = // "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")"; } public static void main(String[] args) throws IOException { String url = "http://www.baidu.com/"; GetContentPicture gcp = new GetContentPicture(); gcp.get(url); } }
如有疑問(wèn)請(qǐng)留言或者到本站社區(qū)交流討論,感謝閱讀,希望能幫助到大家,謝謝大家對(duì)本站的支持!