[button color="primary" icon="" url="https://own.masheng.fun/data/nanjing.csv" type=""]nanjing.csv[/button]
import numpy as np
import pandas as pd
names=['city','addr','rule','price','size','additional','zdate','unit']
df=pd.read_csv('/data/nanjing.csv',names=names)
df.head()
|
city |
addr |
rule |
price |
size |
additional |
zdate |
unit |
---|
0 |
南京 |
鼓楼-青云巷 |
3室1厅1卫 |
900 |
12㎡ |
电视 冰箱 洗衣机 空调 热水器 床 暖气 宽带 衣柜 天然气 |
2020-03-08 |
元/月(季付价) |
---|
1 |
南京 |
鼓楼-南东瓜市 |
2室0厅1卫 |
1600 |
20㎡ |
NaN |
2020-03-05 |
元/月 |
---|
2 |
南京 |
鼓楼-五塘和园 |
4室1厅1卫 |
1390 |
12㎡ |
冰箱 洗衣机 空调 热水器 床 宽带 衣柜 |
2020-03-05 |
元/月(月付价) |
---|
3 |
南京 |
鼓楼-凤凰二村 |
1室1厅1卫 |
2000 |
27㎡ |
NaN |
2020-03-06 |
元/月 |
---|
4 |
南京 |
鼓楼-中海桃源里 |
4室1厅1卫 |
1090 |
10㎡ |
冰箱 洗衣机 空调 热水器 床 宽带 衣柜 |
2020-03-08 |
元/月(月付价) |
---|
df = df.fillna(method='ffill')
df[df.isnull().T.any()]
|
city |
addr |
rule |
price |
size |
additional |
zdate |
unit |
---|
hall=df['rule'].str.extract('.(\d)')
room=df['rule'].str.extract('(\d)')
room=room.astype('int64')
hall=hall.astype('int64')
df['rule']=(room+(hall*2))/2
df['size']=df['size'].str.extract('(\d+)')
df['size']=df['size'].astype('int64')
def additionals(a):
counts=[]
counts=a.split(' ')
return len(counts)
df['additional']=df['additional'].apply(additionals)
data=df.groupby(df['zdate']).mean()
data.tail()
|
rule |
price |
size |
additional |
---|
zdate |
|
|
|
|
---|
2020-03-05 |
2.408271 |
1937.629323 |
49.803008 |
7.490977 |
---|
2020-03-06 |
2.412957 |
1918.367250 |
52.073132 |
7.460254 |
---|
2020-03-07 |
2.332008 |
2004.573890 |
50.938370 |
7.460570 |
---|
2020-03-08 |
2.511217 |
1770.254929 |
35.565942 |
7.208702 |
---|
2020-03-09 |
2.722754 |
1530.810778 |
30.033533 |
7.217964 |
---|
counts=df.groupby('zdate').size()
counts.tail()
zdate
2020-03-05 1330
2020-03-06 1258
2020-03-07 1509
2020-03-08 2942
2020-03-09 835
dtype: int64
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
X=data
y=counts
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=125)
## 建立线性回归模型
clf = LinearRegression().fit(X_train,y_train)
print('建立的LinearRegression模型为:','\n',clf)
建立的LinearRegression模型为:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
y_pred = clf.predict(X_test)
print(len(y_pred))
print('预测前20个结果为:','\n',y_pred[:20])
26
预测前20个结果为:
[ 64.04629768 72.32580809 266.94981422 254.43703779 134.24151951
0.80319776 122.11897184 70.2876959 5.53085201 232.45575409
185.17606994 225.12269869 87.68120282 221.29098145 150.57651981
-27.12364545 522.41034522 203.97223093 203.90648176 66.07014051]
dvjjxjznnu
不错不错,我喜欢看 https://www.ea55.com/
zlbkfggnon
看的我热血沸腾啊https://www.237fa.com/
rpbqlpeilb
叼茂SEO.bfbikes.com