当前位置:天才代写 > tutorial > 其他教程 > R可视化阐明链家网南京楼市数据

R可视化阐明链家网南京楼市数据

2017-12-03 08:00 星期日 所属: 其他教程 浏览:1280

一、10个数据阐明维度各一级区域成交量排行和销售均价环境热门成交衡宇户型绘饼图阐明链家南京衡宇年月统计阐明链家南京衡宇生意业务议价空间统计(议价空间 = 挂牌价 – 成交价)链家南京楼层房价统计最贵的top10小区链家逐日销售额平均成交总价与平均成交单价衡宇成交周期报价和成交价差别
二、数据处理惩罚及可视化阐明#加载数据data = read.csv(‘D:/操练集/南京链家楼市阐明/dpLianjiabydistrict.csv’,header = TRUE,fileEncoding = “gbk”) dim(data)#数据共有30515行,24列#看一下数据 str(data)#1、链家南京逐日成交量统计可视化daySaleAmount <- with(data, table(dealDate))barplot(daySaleAmount, xlab=”生意业务时间”, ylab=”成交量(套)”, main=”链家南京成交量统计(2012.5.3-2017.7.6)”

R可视化阐发链家网南京楼市数据

从2016年12月开始至2017年2月末,南京楼市迎来买买买的飞腾!成交量爆增2012.5.3-2017.7.6近五年,南京楼市成交量在2016年尾开始迎来买房岑岭
#2、一级区域成交量排行和销售均价#一级区域视察赋值给增加的新变量zonedata$district <-as.character(data$district)zonedata = function(x){   strsplit(x,split = “/”)[[1]][1]}zone1 <-sapply(data$district,zonedata)data$zone <- zone1data$zone <- factor(data$zone)zoneSales <- with(data, table(zone))zoneSales <- sort(zoneSales, decreasing=TRUE)#一级区域成交量数据表 (图2)zoneSales=as.data.frame(zoneSales)

R可视化阐发链家网南京楼市数据

鼓楼区成交量较大,以8350套成交套数雄居第一!秦淮区以6388套成交套数位于NO2!
#链家南京各区域生意业务环境画图!# 整合数据,计较出各区域均价tempPrices <- aggregate(data$unitPrice, by=list(zone=data$zone), mean)zoneUnitPrices <- NULL#   a=NULL a[c(“will”,”night”)] = c(1,2) azoneUnitPrices[t(tempPrices[“zone”])] <- t(tempPrices[“x”])#zoneUnitPrices[“建邺”]zoneUnitPrices[2] <- round(zoneUnitPrices[2], 0)ry = as.data.frame(zoneUnitPrices)$zoneUnitPricesly = zoneSales$Freqtwoord.plot(lx=c(1:9), ly=ly, lylim=c(0, max(zoneSales$Freq)*1.1),            rylim=c(0, max(ry)*1.1), main=”链家南京各区域生意业务环境”,             xlab=”区域”, ylab=”成交量(套)”, rylab=”均价(元)”,             rx=c(1:9), ry=ry,            type=c(“bar”, “b”), xlim=c(0, 10))##加载均价数据text(c(1:9), zoneSales$Freq+300, zoneSales$Freq)rypos <- round(zoneUnitPrices[zoneSales$zone]*max(zoneSales$Freq)/max(zoneUnitPrices))text(c(1:9), rypos+600, round(zoneUnitPrices[zoneSales$zone]), col=”red”)

R可视化阐发链家网南京楼市数据

图3#链家南京热门成交户型Top6layoutSaleAmount <- with(data, table(layout))layoutSaleAmount <- sort(layoutSaleAmount, decreasing=TRUE)topLayoutSaleAmount <- layoutSaleAmount[1:6]topLayoutSaleAmount[7] <- sum(layoutSaleAmount)-sum(layoutSaleAmount[1:6])names(topLayoutSaleAmount)[7] <- “其他”pctTopLayoutSaleAmount <- round(topLayoutSaleAmount/sum(topLayoutSaleAmount)*100,2)pctTopLayoutSaleAmount <- paste(names(topLayoutSaleAmount), “:”, pctTopLayoutSaleAmount, “%”, sep=””)par(mfrow=c(1,1))pie(topLayoutSaleAmount, labels= pctTopLayoutSaleAmount, col=rainbow(length(topLayoutSaleAmount)),main=”链家南京热门成交户型Top6(2012.5.3-2017.7.6)”)

R可视化阐发链家网南京楼市数据

#链家南京衡宇年月统计data$years <- as.character(data$years)data$years <- as.numeric(data$years)data <- within(data, { hood <- NA hood[is.na(data$years)] <- NA hood[data$years<1990] <- “90年前” hood[data$years>=1990 & data$years<2000] <- “90年月” hood[data$years>=2000 & data$years<2010] <- “00年月” hood[data$years>=2010] <- “10年月”})data$hood <- ordered(data$hood, levels=c(“10年月”, “00年月”, “90年月”, “90年前”))hoodSales <- with(data, table(hood))barplot(hoodSales, xlab=”衡宇年月”, ylab=”成交量(套)”, ylim=c(0, max(hoodSales)*1.1), main=”链家南京衡宇年月统计(2012.5.3-2017.7.6)”)text(c(1:length(hoodSales))*1.2-0.48, hoodSales+400, hoodSales)

R可视化阐发链家网南京楼市数据

#5、议价空间 = 挂牌价 – 成交价gapPrices <- data$quotedPrice – data$totalPricedayGapPrices <- aggregate(gapPrices, by=list(dealDate=data$dealDate), mean)tsGapPrices=ts(dayGapPrices[2],frequency=365,start=c(2012,5,3),end=c(2017,7,6))par(mfrow=c(2,1), mar=c(2,4,3,2))plot.ts(tsGapPrices, type=”p”, xlab=NULL, xaxt=”n”, ylab=NULL, main=”链家南京衡宇生意业务议价空间统计(2012.5.3-2017.7.6)”)decTsGapPrices <- decompose(tsGapPrices)par(mar=c(3,4,0,2))

R可视化阐发链家网南京楼市数据

#楼层房价统计data$loft <- as.character(data$loft)data$loft <- as.numeric(data$loft)# 无电梯房elderdata <- data[data$elevator==”无电梯” | (is.na(data$elevator) & data$loft <= 7),]# 有电梯房newerdata <- data[!(data$elevator==”无电梯” | (is.na(data$elevator) & data$loft <= 7)),]# 按照stair计较生意业务均价stairElderPrices <- aggregate(elderdata$unitPrice, by=list(stair=elderdata$stair), mean)stairNewerPrices <- aggregate(newerdata$unitPrice, by=list(stair=newerdata$stair), mean)stairElderPrices[,2] <- round(stairElderPrices[,2], 0)stairNewerPrices[,2] <- round(stairNewerPrices[,2], 0)maxPrice <- max(stairElderPrices[,2],stairNewerPrices[,2])plot(stairElderPrices[,1], stairElderPrices[,2], ylim=c(0, maxPrice*1.1), main=”链家南京楼层房价统计”,type=”b”, col=”blue”, xlab=”楼层”, ylab=”生意业务均价(元)”)text(stairElderPrices[,1], stairElderPrices[,2]+1000, stairElderPrices[,2], col=”blue”)par(new=TRUE)plot(stairElderPrices[,1], stairNewerPrices[,2], ylim=c(0, maxPrice*1.1),type=”b”, col=”red”, xlab=”楼层”, ylab=”生意业务均价(元)”)text(stairElderPrices[,1], stairNewerPrices[,2]+1000, stairNewerPrices[,2], col=”red”)legend(“bottomright”, legend=c(“无电梯”,”有电梯”), col=c(“blue”, “red”), bty=”y”, bg=”aliceblue”, pch=c(15,15))par(new=FALSE)#p#分页标题#e#

R可视化阐发链家网南京楼市数据

链家南京楼层房价统计#7、最贵的top10小区blockPrices <- aggregate(data$unitPrice, by=list(block=data$block), mean)blockPrices <- blockPrices[order(blockPrices[,2], decreasing=TRUE),]names(blockPrices)[2] <- “unitPrice”# 小区总数length(blockPrices[,1])# 最贵Top10小区blockPrices[1:10,]

R可视化阐发链家网南京楼市数据

# 链家逐日销售额daySales <- aggregate(data$totalPrice, by=list(dealDate=data$dealDate), sum)plot(daySales[,1], daySales[,2], type=’b’, xlab=”Deal Date”, ylab=”Sale Value”, main=”Lianjia Day Sale Value(2017.3.15-2017.7.3)”)

R可视化阐发链家网南京楼市数据

链家逐日销售额#平均成交总价mean(data$totalPrice)#平均成交单价mean(data$unitPrice)

R可视化阐发链家网南京楼市数据

# 衡宇成交周期tranCycle <- aggregate(data$tranCycle, by=list(dealDate=data$dealDate), mean)plot(tranCycle[,1], tranCycle[,2], type=’b’, xlab=”Deal Date”, ylab=”Transaction Cycle”, main=”Lianjia Transaction Cycle(2017.3.15-2017.7.3)”

R可视化阐发链家网南京楼市数据

衡宇成交周期# 报价和成交价差别gapPrice <- data$quotedPrice – data$totalPricegap <- aggregate(gapPrice, by=list(dealDate=data$dealDate), mean)plot(gap[,1], gap[,2], type=’b’, xlab=”Deal Date”, ylab=”Gap Price”, main=”Lianjia Gap Price(2017.3.15-2017.7.3)”)

R可视化阐发链家网南京楼市数据

报价和成交价差别
文末彩蛋Hadley 大神的小我私家主页:http://hadley.nz/ 《R for Data Science》 利用R做数据科学的焦点东西:http://r4ds.had.co.nz/《ggplot2:elegant graphics for data analysis》利用ggplot2建设图形:https://www.amazon.com/dp/0387981403/ref=cm_sw_su_dp?tag=ggplot2-20《Advanced R》 R的运行机制:http://adv-r.had.co.nz/《R packages》 R的软件工程实践,并利用包打包、记录和测试代码:http://r-pkgs.had.co.nz/
接待插手本站果真乐趣群贸易智能与数据阐明群乐趣范畴包罗各类让数据发生代价的步伐,实际应用案例分享与接头,阐明东西,ETL东西,数据客栈,数据挖掘东西,报表系统等全方位常识QQ群:81035754

 

    关键字:

天才代写-代写联系方式