(PYTHON)데이터 시각화

PYTHON

(PYTHON)데이터 시각화

김만식이 2020. 11. 3. 16:34

www.anaconda.com/products/individual/download-success

Anaconda | Get Started

Anaconda is the birthplace of Python data science. We are a movement of data scientists, data-driven enterprises, and open source communities.

www.anaconda.com

사이트접속후 아나콘다설치

#python 데이터 시각화
#Big Data 
#수집 , 분석 (규칙성) , 활용 
#인공지능 -> 머신러닝 -> 딥러닝

#스크레이핑 웹사이트에 있는 특정정보를 추출하는기술
#크롤링 프로그램이 웹사이트를 정지적으로 돌며 정보를 추출하는 기술

#urllib http ,ftp 프로토콜을 통해 다운받고 사용할 수있게 도와주는 라이브러리
# request 모듈 :웹사이트에 있는데이터에 접근하게해주는모듈
#urlretrieve함수 :웹상에 자료를 다운로드 할수 있게 도와주는 함수

# import urllib.request
# url ="https://ssl.pstatic.net/tveta/libs/1307/1307743/fce4e95569c705213e40_20201023101121863.jpg"
# img = "C:/python-img/test.jpg"

# urllib.request.urlretrieve(url,img) #urletrieve (url, 저장할 경로)
# print("다운로드완료")


#---------------------------------------------------------------------------------------

# import urllib.request
# url ="https://ssl.pstatic.net/tveta/libs/1307/1307743/fce4e95569c705213e40_20201023101121863.jpg"
# imgpath ="C:/python-img/test1.jpg"

# # 파일로 저장하는 과정
# # f =open("a.txt" ,"w")
# # f =write("테스트로 파일에 내용을 적습니다")
# # f.close()

# # 위과정을 with 문으로 간단하게 표현한다

# # with open  open("a.txt" ,"w") as f:
# # f =write("테스트로 파일에 내용을 적습니다")

# downlmg = urllib.request.urlopen(url).read()
# with open(imgpath,"wb") as f:   
#     f.write(downlmg)
#     print("완료")
#---------------------------------------------------------------------------------------

# import urllib.request
# import urllib.parse # url을 인코딩하기 위해 불러오는 모듈
# rssURL ="https://www.weather.go.kr/weather/forecast/mid-term-rss3.jsp"
# # 매개변수 지역별 코드를 지정하는 변수
# # stnId 지역별 코드를 지정하는 변수
# # stnId 108 :전국 ,109:서울 경기도 
# value = {
#     'stnId':'108'
# }
# params = urllib.parse.urlencode(value)
# url = rssURL + "?" + params
# print("url 값은 "+ url)
# data = urllib.request.urlopen(url).read()
# text= data.decode("UTF-8")
# print(text)
#---------------------------------------------------------------------
# import sys
# import urllib.request as rq
# import urllib.parse as pr # 별칭지정 as

# if len(sys.argv) <= 1 : 
#     print("사용법 : python 인수 1 인수 2")

# regionCode = sys.argv[1]

# rssURL ="https://www.weather.go.kr/weather/forecast/mid-term-rss3.jsp"

# values = {
#     'stnId': regionCode
# }

# params = pr.urlencode(values)
# url = rssURL +"?"+params
# print(url)
# data = rq.urlopen(url).read()
# text = data.decode("utf-8")
# print(text)

#-----------------------------------------------------------------------------------

# from bs4 import BeautifulSoup


# html ="""
# <html>
#     <body>
#     <h1>beatifulSoup 사용방법<h1/>
#     <p>스프레이핑 연습하기<p/>
#     <p>원하는데이터 추출하기<p/>
#     </body>
# </html>
# """

# #html 분석하기
# soup = BeautifulSoup(html, "html.parser") 

# #원하는 요소 접근하기

# h1 = soup.html.body.h1
# p1 =soup.html.body.p
# p2 =p1.next.sibling.next_sibling

# print(h1.string)
# print(p1.string)
# print(p2.string)

#-----------------------------------------------------------------------

# from bs4 import BeautifulSoup


# html ="""
# <html>
#     <body>
#     <h1>스크레이핑 연습<h1/>
#     <p>웹페이지를 분석해보기<p/>
#     <p>데이터 정제하기..<p/>
#     </body>
# </html>
# """

# #html 분석하기
# soup = BeautifulSoup(html, "html.parser") 

# # find 메서드를 이용한 데이터 추출
# title =soup.find(id="title")
# # subtitle = soup.find(id="subtitle")

# # #원하는 요소 접근하기


# # print(title.string)
# # print(subtitle.string)

# from bs4 import BeautifulSoup
# import urllib.request as request

# rssURL ="https://www.weather.go.kr/weather/forecast/mid-term-rss3.jsp"

# HTML = request.urlopen(rssURL)
# # HTML = request.urlopen(rssURL)

# # #html 분석하기
# soup = BeautifulSoup(HTML,"html.parser") 

# #find_all() 메서드를 사용
# title = soup.find("title").string
# wf = soup.find("wf").string

# print(title)
# print(wf)

# #css web에서 디자인을 담당
# # I = ># 으로 표현
# # class 으로 표현
# # select_one()
# # css 선택자로 요소하나의 선택자로 요소 하나를 추출
# #select 
# # 요소를 여러개 리스트로 출력

# from bs4 import BeautifulSoup
# html = """
# <html>
# <body>
#     <div id="LecList1">
#     <h1>데이터 과학</h1>
#     </div>
#     <div id="LecList2">
#     <h1> 빅데이터 분석강좌</h1>
#     <ul class="subject">
#     <li>R언어 강좌</li>
#     <li>머신러닝을 위한 데이터처리</li>
#     <li>파이썬으롤 익히는 딥러닝이론</li>
#     </ul>
#     </div>
# </body>
# </html>
# """

# soup = BeautifulSoup(html,"html.parser") 

# # 하나의 데이터를 뽑아올때
# h1= soup.select_one("div#LecList1 > h1").string
# print(h1)
# div id가 lecture인 ul class가 subject인 li태그의 정보 모두를 출력
#subject =soup.select("div.LecList2 > ul.subject > li")
# for li in subject :    
#     print("li",li.string)



# from bs4 import BeautifulSoup
# str="""
# <ul>
#     <li id="web">1</li>
#     <li id="mobile">2</li>
#     <li id="datascience">3</li>
#     <li id="database">4</li>
# </ul>
# """
# soup =BeautifulSoup(str,"html.parser")

# web = soup.select_one("ul > li#web").string
# print(web)



# from urllib.parse import urljoin

# baseUrl = "http://www,example.com/html/a.html"

# print(urljoin(baseUrl, "b.html"))
# print(urljoin(baseUrl, "sub/c,html"))
# print(urljoin(baseUrl, "../index.html"))
# print(urljoin(baseUrl, "../image/a.png"))
# print(urljoin(baseUrl, "../css/style.css"))

# #urljoin 두번째 매개 변수에 상대경로가 아닌 절대 경로를
# # 지정하는 경우("http://~")

# print(urljoin(baseUrl, "http://www.ohter.com/aa"))
# print(urljoin(baseUrl, "http://www.another.com/bb/index.html"))

www.weather.go.kr/weather/forecast/mid-term-rss3.jsp

www.weather.go.kr/weather/forecast/mid-term-rss3.jsp?stnid=184

(아나콘다설치시 설치됨)

beautifulSoup

데이터 분석

다운로드 x

find()

하나의 요소의 값을 읽어옴

html id 속성값을 가지고 올수 있음

find_all()

전체데이터를 읽어오기