본문 바로가기

spring

클로링 과 셀레니움

package com.example.web;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
@Controller
public class CrawlingController {
	
	//CGV 무비 차트 크롤링 
	@ResponseBody
	@RequestMapping("movie.json")
	public List<HashMap<String,Object>> movie() throws Exception{
		
		List<HashMap<String,Object>> list=new ArrayList<HashMap<String,Object>>();
		
		Document doc=Jsoup.connect("http://www.cgv.co.kr/movies/").get();
		
		Elements es=doc.select(".sect-movie-chart ol");
		
		for(Element e:es.select("li")){
			HashMap<String,Object> map=new HashMap<String,Object>();
			map.put("rank", e.select(".rank").text());
			map.put("title", e.select(".title").text());
			map.put("image",e.select("img").attr("src"));
			
			if(!e.select(".title").text().equals("")){//빈 바구니가 나오는 것을 막기 위해
				list.add(map);
			}
			
		}
		return list;
		
	}
	@RequestMapping("list")
	public String list(){
		return "list";
	}
	
	//네이버 실시간 검색어 크롤링
	@ResponseBody
	@RequestMapping("naver.json")
	public List<HashMap<String,Object>> naver() throws Exception{
		
		List<HashMap<String,Object>> list=new ArrayList<HashMap<String,Object>>();
		
		Document doc=Jsoup.connect("https://www.naver.com/").get();
		
		Elements es=doc.select(".ah_list .ah_l");
		
		for(Element e:es.select(".ah_item")){
			HashMap<String,Object> map=new HashMap<String,Object>();
			map.put("rank", e.select(".ah_r").text());
			map.put("title", e.select(".ah_k").text());
			
			if(!e.select(".ah_k").text().equals("")){//빈 바구니가 나오는 것을 막기 위해
				list.add(map);
			}
		
			
		}
		return list;
		
	}
	//네이버 실시간 검색어 크롤링
	@ResponseBody
	@RequestMapping("daum.json")
	public List<HashMap<String,Object>> daum() throws Exception{
		
		List<HashMap<String,Object>> list=new ArrayList<HashMap<String,Object>>();
		
		Document doc=Jsoup.connect("https://www.daum.net/").get();
		
		Elements es=doc.select(".info_today .list_weather");
		
		for(Element e:es.select(".hide")){
			HashMap<String,Object> map=new HashMap<String,Object>();
			map.put("area", e.select(".txt_part").text());
			map.put("temp", e.select(".txt_temper").text());
			list.add(map);
			
		}
		return list;
		
	}
	//네이버 여행 셀레늄 데이터 뽑아내기
	@ResponseBody
	@RequestMapping("travel.json")
	public ArrayList<HashMap<String,Object>> travel()throws Exception{
		ArrayList<HashMap<String,Object>> list=new ArrayList<HashMap<String,Object>>();
		
		//셀레늄을 쓰는 경우
		
		//드라이브 설정
		System.setProperty("webdriver.chrome.driver", "c:/spring/chromedriver.exe");
		ChromeOptions option=new ChromeOptions();
		option.addArguments("headless");
		WebDriver driver=new ChromeDriver(option);
		
		//위치에 맞은 설정
		driver.get("https://www.naver.com/");
		
		//버튼 찾기
		WebElement btnTheme=driver.findElement(By.id("PM_ID_themecastNavi")).findElement(By.className("ac_btn_cate"));
		
		//버튼 누르기
		btnTheme.click();
		
		WebElement theme = driver.findElement(By.id("PM_ID_themeEditItemList"));
		
		
		//theme 찾기
		List<WebElement> themes=theme.findElements(By.className("at_item"));
		
		
		for(WebElement e:themes){
			WebElement item=e.findElement(By.className("PM_CL_themeItemSelect"));
			if(item.getAttribute("data-id").equals("TRAVEL")){
				item.click();
				
				WebElement travel=driver.findElement(By.id("PM_ID_themecastBody"));
				List<WebElement> travels=travel.findElements(By.className("tl_default"));
				for(WebElement el:travels){
					WebElement title=el.findElement(By.className("td_t"));
					HashMap<String,Object> map=new HashMap<String,Object>();
					map.put("title",title.getText());
					list.add(map);
				}
			}
		}
		
		driver.quit();
		return list;
		
	}
}