import requests
from bs4 import BeautifulSoup
import pandas as pd
import pymysql
host_name = "funcoding-db.ca1fydhpobsc.ap-northeast-2.rds.amazonaws.com"
username = ""
password = ""
database_name = "dave_db" # 데이터베이스 이름을 sakila 로 바꿔줘야 합니다.
db = pymysql.connect(
host=host_name, # DATABASE_HOST
port=3306,
user=username, # DATABASE_USERNAME
passwd=password, # DATABASE_PASSWORD
db=database_name, # DATABASE_NAME
charset='utf8'
)
cursor = db.cursor()
cursor.execute("set names utf8")
db.commit()
SQL_QUERY = """
DROP DATABASE IF EXISTS estate_db;
CREATE DATABASE estate_db DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
USE estate_db;
DROP TABLE IF EXISTS estate_db.estate;
CREATE TABLE estate_db.estate (
estate_id INT UNSIGNED NOT NULL AUTO_INCREMENT,
price INT NOT NULL,
year CHAR(4) NOT NULL,
dong VARCHAR(30) NOT NULL,
apartname VARCHAR(30) NOT NULL,
month CHAR(2) NOT NULL,
day CHAR(5) NOT NULL,
space VARCHAR(30) NOT NULL,
address VARCHAR(30) NOT NULL,
addresscode CHAR(5) NOT NULL,
floor CHAR(3) NOT NULL,
PRIMARY KEY(estate_id)
) DEFAULT CHARSET=utf8 COLLATE=utf8_bin
"""
cursor.execute(SQL_QUERY)
db.commit()
servic_key = "WMV3wl%2BU%2BMG%2FOYQK3Wv99q1H79wjvVwIwHEjvrDV3K4gG2h9P1%2BKiB%2FFa1QoYI0yxBpJ3sm1L0OAEam9Rp2sbw%3D%3D" # 인증키
locate_code = '11380' # 서울 은평구
contract_date = '201708' # 실거래가 기간
request_url = "http://openapi.molit.go.kr:8081/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcAptTrade?LAWD_CD=" + locate_code + "&DEAL_YMD=" + contract_date + "&serviceKey=" + servic_key
pparams = {'LAWD_CD': 'value1', 'DEAL_YMD': 'value'}
response = requests.get(request_url)
# print(response.content.decode('utf-8'))
soup = BeautifulSoup(response.content.decode('utf-8'), 'xml')
items = soup.find_all('item')
items
from urllib.request import urlopen
import xml.etree.ElementTree as etree
import pandas as pd
key = "WMV3wl%2BU%2BMG%2FOYQK3Wv99q1H79wjvVwIwHEjvrDV3K4gG2h9P1%2BKiB%2FFa1QoYI0yxBpJ3sm1L0OAEam9Rp2sbw%3D%3D" # 인증키
loc = '11380' # 서울 은평구
ym = '201708'
request_url = "http://openapi.molit.go.kr:8081/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcAptTrade?LAWD_CD=" + loc + "&DEAL_YMD=" + ym + "&serviceKey=" + key
response = urlopen(request_url)
xml_data = response.read().decode('UTF-8') # 데이터를 가져오고 UTF-8 String으로 변환
print(xml_data)
root = etree.fromstring(xml_data)
estate_data = list()
for item in root.iter('item'):
estate_item1 = child.find('거래금액').text
estate_item2 = child.find('년').text
estate_item3 = child.find('법정동').text
estate_item4 = child.find('아파트').text
estate_item5 = child.find('월').text
estate_item6 = child.find('일').text
estate_item7 = child.find('전용면적').text
estate_item8 = child.find('지번').text
estate_item9 = child.find('지역코드').text
estate_item10 = child.find('층').text
estate_data.append(
[estate_item1, estate_item2, estate_item3, estate_item4, estate_item5, estate_item6, estate_item7, estate_item8,
estate_item9, estate_item10])
columns = ['거래금액', '년', '법정동', '아파트', '월', '일', '전용면적', '지번', '지역코드', '층']
df = pd.DataFrame(estate_data, columns=columns)
estate_data = list()
for item in items:
estate_item = list()
estate_item1 = item.find('거래금액').text.strip()
estate_item2 = item.find('년').text.strip()
estate_item3 = item.find('법정동').text.strip()
estate_item4 = item.find('아파트').text.strip()
estate_item5 = item.find('월').text.strip()
estate_item6 = item.find('일').text.strip()
estate_item7 = item.find('전용면적').text.strip()
estate_item8 = item.find('지번').text.strip()
estate_item9 = item.find('지역코드').text.strip()
estate_item10 = item.find('층').text.strip()
"""
sql = '''
INSERT INTO estate_db.estate
(price, year, dong, apartname, month, day, space, address, addresscode, floor)
VALUES
(%d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')
''' % (int(estate_item1.replace(',', '')), estate_item2, estate_item3, estate_item4, estate_item5, estate_item6, estate_item7, estate_item8,
estate_item9, estate_item10)
# print(sql)
cursor.execute(sql)
db.commit()
"""
estate_item.append(int(estate_item1.replace(',', '')))
estate_item.append(estate_item2)
estate_item.append(estate_item3)
estate_item.append(estate_item4)
estate_item.append(estate_item5)
estate_item.append(estate_item6)
estate_item.append(estate_item7)
estate_item.append(estate_item8)
estate_item.append(estate_item9)
estate_item.append(estate_item10)
estate_data.append(estate_item)
print(cursor.lastrowid)
estate_data
columns = ['거래금액', '년', '법정동', '아파트', '월', '일', '전용면적', '지번', '지역코드', '층']
df = pd.DataFrame(estate_data, columns=columns)
df
df.to_csv('estate.csv', sep=',', encoding='utf-8')
csv_test = pd.read_csv('estate.csv')
csv_test