Analysing mobility traces

Home

A set of steps to analysis/observe the mobility traces from Wifi APs in a private entity. Dataset is not public. Data is anonymized with random codes.

In [3]:
import pandas as pd
import numpy as np
import time
import networkx as nx
import collections
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

import holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')

Preparing data with Pandas

In [3]:
df = pd.read_csv("proximity.txt","	")
print(df.head(5))
             date_time                        hashed_sta_eth_mac  \
0  2020-10-01 10:45:34  0901F7F6F37FF77B2D177EF92DE141191B1DCDBD   
1  2020-10-01 10:45:34  0970CA92F7488C749F7497FAE333A6ADEA69FCE4   
2  2020-10-01 10:45:34  150380339F956912D7F41ED2F4A7A7ED5BBEE51E   
3  2020-10-01 10:45:34  15AA50487C9B326BFBF0B8F699681C3AD53446BA   
4  2020-10-01 10:45:34  1A3047D4B2F72BE8537E69680C04B276B8B253FA   

         ap_name radio_mac_addr  rssi_val          target_type  \
0   AP-MO-DB-W22   1864721957E0       -59  TARGET_TYPE_STATION   
1    AP-MA-A-W43   9C1C12840130       -73  TARGET_TYPE_STATION   
2   AP-JO-10-W55   18647216AF10       -54  TARGET_TYPE_STATION   
3  AP-ITD-PB-W07   18647217D0C0       -46  TARGET_TYPE_STATION   
4   AP-JO-60-W09   18647217E020       -44  TARGET_TYPE_STATION   

                            json_name          ts          apiDateTime  
0  proximity_2020-10-01 10:46:25.json  1601541934  2020-10-01 10:46:25  
1  proximity_2020-10-01 10:46:25.json  1601541934  2020-10-01 10:46:25  
2  proximity_2020-10-01 10:46:25.json  1601541934  2020-10-01 10:46:25  
3  proximity_2020-10-01 10:46:25.json  1601541934  2020-10-01 10:46:25  
4  proximity_2020-10-01 10:46:25.json  1601541934  2020-10-01 10:46:25  
In [4]:
df.index = pd.DatetimeIndex(df.date_time)
df = df.drop(columns=['date_time',"json_name","apiDateTime"])
df = df.rename(columns={"hashed_sta_eth_mac": "hashedmac"})


#Number of different hashed macs in total
(unique, counts) = np.unique(df.hashedmac, return_counts=True)
print("Total Hashed Mac Samples: %i  with unique elements: %i - diff. %i"%(df.shape[0],len(unique),df.shape[0]-len(unique)))

df["one"] = np.ones(df.shape[0])
print(df.head(5))
Total Hashed Mac Samples: 1223997  with unique elements: 127824 - diff. 1096173
                                                    hashedmac        ap_name  \
date_time                                                                      
2020-10-01 10:45:34  0901F7F6F37FF77B2D177EF92DE141191B1DCDBD   AP-MO-DB-W22   
2020-10-01 10:45:34  0970CA92F7488C749F7497FAE333A6ADEA69FCE4    AP-MA-A-W43   
2020-10-01 10:45:34  150380339F956912D7F41ED2F4A7A7ED5BBEE51E   AP-JO-10-W55   
2020-10-01 10:45:34  15AA50487C9B326BFBF0B8F699681C3AD53446BA  AP-ITD-PB-W07   
2020-10-01 10:45:34  1A3047D4B2F72BE8537E69680C04B276B8B253FA   AP-JO-60-W09   

                    radio_mac_addr  rssi_val          target_type          ts  \
date_time                                                                       
2020-10-01 10:45:34   1864721957E0       -59  TARGET_TYPE_STATION  1601541934   
2020-10-01 10:45:34   9C1C12840130       -73  TARGET_TYPE_STATION  1601541934   
2020-10-01 10:45:34   18647216AF10       -54  TARGET_TYPE_STATION  1601541934   
2020-10-01 10:45:34   18647217D0C0       -46  TARGET_TYPE_STATION  1601541934   
2020-10-01 10:45:34   18647217E020       -44  TARGET_TYPE_STATION  1601541934   

                     one  
date_time                 
2020-10-01 10:45:34  1.0  
2020-10-01 10:45:34  1.0  
2020-10-01 10:45:34  1.0  
2020-10-01 10:45:34  1.0  
2020-10-01 10:45:34  1.0  
In [5]:
#Samples by day
df.resample("B")["one"].sum().plot(kind="bar")
plt.show()

Some observations:

  • Missing days: 03, 04, and 10, 11 ?? >>> Weekends!
  • 12/10 national holiday
  • 15/10 data taken in this day at 10.30am aprox.

 In next cases, we analyse the movements/handovers among buildings in only one day.

In [6]:
firstday = df.index.min()
fdy = firstday.strftime("%m-%d-%Y")
ldy = df.index.max()
# pd.date_range(fdy,ldy)
In [7]:
macs = df[fdy].groupby("hashedmac")["one"].sum()
dfmacs = df[fdy]
dfmacs = dfmacs[dfmacs.hashedmac.isin(macs.index)]

# In lmacs, we have users' handovers
lmacs = dfmacs.groupby("hashedmac")["ap_name"].apply(list)
print(dfmacs.head(3))
print("*"*40)
print(lmacs.head(3))
                                                    hashedmac       ap_name  \
date_time                                                                     
2020-10-01 10:45:34  0901F7F6F37FF77B2D177EF92DE141191B1DCDBD  AP-MO-DB-W22   
2020-10-01 10:45:34  0970CA92F7488C749F7497FAE333A6ADEA69FCE4   AP-MA-A-W43   
2020-10-01 10:45:34  150380339F956912D7F41ED2F4A7A7ED5BBEE51E  AP-JO-10-W55   

                    radio_mac_addr  rssi_val          target_type          ts  \
date_time                                                                       
2020-10-01 10:45:34   1864721957E0       -59  TARGET_TYPE_STATION  1601541934   
2020-10-01 10:45:34   9C1C12840130       -73  TARGET_TYPE_STATION  1601541934   
2020-10-01 10:45:34   18647216AF10       -54  TARGET_TYPE_STATION  1601541934   

                     one  
date_time                 
2020-10-01 10:45:34  1.0  
2020-10-01 10:45:34  1.0  
2020-10-01 10:45:34  1.0  
****************************************
hashedmac
0008C7B255C637F7A986A208064F9FD46ED474A1                                       [AP-MO-DB-W51]
000E406B899AB60390F1BD2574038295DFABF22C                                       [AP-IE-IE-W01]
000EDE8D7FA33EE5D9CE444CDA4A70D1829D0E39    [AP-IE-IE-W06, AP-IE-IE-W06, AP-IE-IE-W06, AP-...
Name: ap_name, dtype: object
In [8]:
userHands = np.array([len(x) for x in lmacs.values])
(unique, counts) = np.unique(userHands, return_counts=True)

print("Mean number of handovers: ", np.mean(userHands))
print("Std handovers: ", np.std(userHands))
print("Median handovers: ", np.median(userHands))

#Histogram of users' handover
fig, ax = plt.subplots()
ax.bar(unique,counts)
plt.show()
#zoom in
fig, ax = plt.subplots()
ax.bar(unique[1:],counts[1:])
plt.show()
Mean number of handovers:  7.38457046930881
Std handovers:  12.91418594747744
Median handovers:  1.0

 Cumulative distribution function

In [9]:
#1
x = np.sort(userHands)
y = np.array(range(len(userHands)))/float(len(userHands))

fig, ax = plt.subplots()
ax.set_xlim(0,50)
ax.plot(x,y)
plt.title('Cumulative distribution function')

ax.set_xlabel('number of handovers')
ax.set_ylabel('$p$')

pstwo = np.where(x==2)[0][0]
ax.axhline(y=y[pstwo], linestyle='dashed', alpha=0.5,color='#004f6d')
ax.text(x=x[pstwo], y=y[pstwo]-0.05, s="%.1f%%"%(y[pstwo]*100.0), alpha=0.7, color='#004f6d')
plt.xticks(sorted(list(plt.xticks()[0]) + [x[pstwo]]))


pseightty= np.where(y>0.8)[0][0]
ax.axvline(x=x[pseightty], linestyle='dashed', alpha=0.5,color='#11008d')
ax.text(x=x[pseightty]-4, y=y[pseightty], s='80%', alpha=0.7, color='#11008d')
plt.xticks(sorted(list(plt.xticks()[0]) + [x[pseightty]]))

plt.show()

Sankey diagram

In [10]:
nodes,edges = [],[]
for aps in lmacs.values:
      nodes=nodes+aps
      c = 0
      i = len(aps)
      while c+1<i: 
          edges.append((aps[c],aps[c+1]))
          c+=1
      if c==0: edges.append((aps[c],aps[c]))
  
nodes = list(set(nodes)) #remove duplicate

# We get the acronym of the building with the AP name.
attr_building={}
for ap in nodes:
    name  = ap.split("-")[1]
    attr_building[ap] = name
names_building = list(set(attr_building.values()))
#edges by building
eall = [(x.split("-")[1],y.split("-")[1]) for (x,y) in edges if x.split("-")[1]!=y.split("-")[1]]
attr_edges = dict(collections.Counter(eall))
  
#Data preparation for Holoview model.    
source,target,value=[],[],[]
for item in attr_edges.items():
    source.append(item[0][0])
    target.append(item[0][1])
    value.append(item[1])
    
edgeBuilding = pd.DataFrame({"source":source,"target":target,"value":value})
edgeBuilding["target"] = edgeBuilding["target"] +"_"

sources_order = edgeBuilding.groupby("source").sum().sort_values("value")[::-1]
targets_order = edgeBuilding.groupby("target").sum().sort_values("value")[::-1]

allnodes = np.concatenate((sources_order.index.values,targets_order.index.values))

nodesSankey = hv.Dataset(pd.DataFrame({"buildings":allnodes}))

value_dim = hv.Dimension('value', unit='dev.')
betBuildings = hv.Sankey((edgeBuilding, nodesSankey),["source","target"],vdims=value_dim)

betBuildings.opts(
    opts.Sankey(labels='buildings', label_position='right', width=900, height=500, cmap='Set1',
                edge_color=dim('source').str(), node_color=dim('buildings').str()))
Out[10]:

Chord plots

1. with user movements among all buildings (including self buildings)

In [11]:
nodes,edges = [],[]
for aps in lmacs.values:
      nodes=nodes+aps
      c = 0
      i = len(aps)
      while c+1<i: 
          edges.append((aps[c],aps[c+1]))
          c+=1
      if c==0: edges.append((aps[c],aps[c]))
  
nodes = list(set(nodes)) #remove duplicate
      
# We get the list of buildings using the AP name
attr_building={}
for ap in nodes:
    name  = ap.split("-")[1]
    attr_building[ap] = name
names_building = list(set(attr_building.values()))
#edges by building
eall = [(x.split("-")[1],y.split("-")[1]) for (x,y) in edges]
#Frequency of movements among buildings
attr_edges = dict(collections.Counter(eall))
print(list(attr_edges.items())[:5])
[(('MO', 'MO'), 12025), (('IE', 'IE'), 3811), (('MA', 'MA'), 13640), (('CJ', 'CJ'), 576), (('GC', 'GC'), 6469)]
In [12]:
#Data preparation for Holoview model.
source,target,value=[],[],[]
for item in attr_edges.items():
    source.append(item[0][0])
    target.append(item[0][1])
    value.append(item[1])
    
edgeBuilding = pd.DataFrame({"source":source,"target":target,"value":value})
nodeBuilding = hv.Dataset(pd.DataFrame({"building":names_building}))

chord = hv.Chord((edgeBuilding, nodeBuilding))
chord.opts(
    opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source'), 
               labels='building', node_color=dim('building').str(),height=500,width=500))
Out[12]:

Probability matrix

In [37]:
names_building = sorted(names_building)
mapIdBuilding = dict(zip(names_building,range(len(names_building))))
freq = np.zeros((len(names_building),len(names_building)))
for b1,b2 in attr_edges:
    freq[mapIdBuilding[b1],mapIdBuilding[b2]] = attr_edges[(b1,b2)]


fig, ax = plt.subplots()

c = ax.pcolor(freq, edgecolors='k', linewidths=4)
ax.set_title('User handover frequency in all buildings')

ax.set_xticks(np.arange(len(names_building)) + 0.5, minor=False)
ax.set_yticks(np.arange(len(names_building)) + 0.5, minor=False)

ax.set_xticklabels(names_building,rotation=90)
ax.set_yticklabels(names_building)

fig.tight_layout()
plt.show()

2. with user movements among all buildings (excluding self buildings)

In [14]:
attr_building={}
for ap in nodes:
    name  = ap.split("-")[1]
    attr_building[ap] = name
names_building = list(set(attr_building.values()))
#edges by building
eall = [(x.split("-")[1],y.split("-")[1]) for (x,y) in edges if x.split("-")[1]!=y.split("-")[1]]
attr_edges = dict(collections.Counter(eall))
  
source,target,value=[],[],[]
for item in attr_edges.items():
    source.append(item[0][0])
    target.append(item[0][1])
    value.append(item[1])
    
    
edgeBuilding = pd.DataFrame({"source":source,"target":target,"value":value})
nodeBuilding = hv.Dataset(pd.DataFrame({"building":names_building}))

chord = hv.Chord((edgeBuilding, nodeBuilding))
chord.opts(
    opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source'), 
               labels='building', node_color=dim('building').str(),height=600,width=600))
Out[14]:

Probability matrix

In [36]:
names_building = sorted(names_building)
mapIdBuilding = dict(zip(names_building,range(len(names_building))))
freq = np.zeros((len(names_building),len(names_building)))
for b1,b2 in attr_edges:
    freq[mapIdBuilding[b1],mapIdBuilding[b2]] = attr_edges[(b1,b2)]


fig, ax = plt.subplots()

c = ax.pcolor(freq, edgecolors='k', linewidths=4)
ax.set_title('User handover frequency in all buildings (excluding self.buildings)')

ax.set_xticks(np.arange(len(names_building)) + 0.5, minor=False)
ax.set_yticks(np.arange(len(names_building)) + 0.5, minor=False)

ax.set_xticklabels(names_building,rotation=90)
ax.set_yticklabels(names_building)

fig.tight_layout()
plt.show()

Return to webpage home:

Home

 In next cases, we analyse the movements/handovers inside buildings in only one day.

 How to describe movement activity within a building?

In [16]:
#in lmacs, we have the handovers
print(lmacs[:3])
hashedmac
0008C7B255C637F7A986A208064F9FD46ED474A1                                       [AP-MO-DB-W51]
000E406B899AB60390F1BD2574038295DFABF22C                                       [AP-IE-IE-W01]
000EDE8D7FA33EE5D9CE444CDA4A70D1829D0E39    [AP-IE-IE-W06, AP-IE-IE-W06, AP-IE-IE-W06, AP-...
Name: ap_name, dtype: object
In [17]:
from collections import defaultdict

equivBuildings={"MO":"MO","GC":"GC","MA":"MA","RL":"RL","AT":"AT","JO":"JO","ITD":"ITD","RES":"RES","SCT":"SCT","IE":"IE","CEP":"MA","SL":"SL","CJ":"CJ","CTI":"CTI","PF":"AT","CL":"CL"}
buildingsCode = equivBuildings.keys()
def buildingCode(equivBuildings,apname):
    return apname.split("-")[1],apname.split("-")[3]

mBuildings = defaultdict(list)
selfMBuild = defaultdict(int)   
apsInBuild = defaultdict(set)
for aps in lmacs.values:
    for pos in range(len(aps)):
        build,apid = buildingCode(equivBuildings,aps[pos])
        apsInBuild[equivBuildings[build]].add(apid)
        
        if pos+1 < len(aps):
            buildn,apidn = buildingCode(equivBuildings,aps[pos+1])
            if equivBuildings[build]==equivBuildings[buildn] and apid != apidn:
                mBuildings[equivBuildings[build]].append((apid,apidn))
        else:
            if pos == 1 or len(mBuildings[equivBuildings[build]])==0:
                mBuildings[equivBuildings[build]].append((apid,apid))
                selfMBuild[equivBuildings[build]]+=1

movs  = np.array([len(mBuildings[k]) for k in mBuildings])
movsNotSelf  = np.array([len(mBuildings[k])-selfMBuild[k] for k in mBuildings])

fig, ax = plt.subplots()
p0 = plt.bar(np.arange(len(movs)),movs-movsNotSelf)
p1 = plt.bar(np.arange(len(movs)),movsNotSelf,bottom=movs-movsNotSelf)
ax.set_title('Handovers in each building')
ax.set_xticks(np.arange(len(movs)), minor=False)
ax.set_xticklabels(mBuildings.keys(),rotation=90)
plt.legend((p0[0],p1[1]),("Only one AP","More APs"))
fig.tight_layout()
plt.show()

apsInBuildings = [len(apsInBuild[k]) for k in apsInBuild]
fig1, ax = plt.subplots()
ax.bar(np.arange(len(apsInBuildings)),apsInBuildings)
ax.set_xticks(np.arange(len(apsInBuild.keys())), minor=False)
ax.set_xticklabels(apsInBuild.keys(),rotation=90)
ax.set_ylabel('Total number of APs')
plt.show()
#Nice to see the rate between the number of APs in buildings vs the handovers. Building: size, design,...
In [18]:
#Handovers frequency
fig1, ax = plt.subplots()
for b in mBuildings.keys():
    x = np.array(sorted(collections.Counter(mBuildings[b]).values(),reverse=True))
    if len(x)>10:
        ax.plot(x,label=b)
ax.set_title('Repetition of same handovers (APi -> APj)')
ax.set_xlabel('Types of transictions bet. APi -> APj')
ax.set_ylabel('Frequency')
ax.set_ylim(0,30)
ax.set_xlim(0,400)
plt.legend(ncol=4)
plt.show()
In [19]:
#AP frequency 
from collections import Counter

freqAP = {}
bigBuild = 0
fig1, ax = plt.subplots()
for b in mBuildings.keys():
    lsdap = list(zip(*mBuildings[b]))
    x = (Counter(lsdap[0])+Counter(lsdap[1])).values()
    if len(x)>bigBuild: 
        bigBuild=len(x)
    freqAP[b] = np.array(sorted(x,reverse=True))
    ax.plot(freqAP[b],label=b)
ax.set_title('Frequency of APs in handovers')
ax.set_xlabel('APs')
ax.set_ylabel('Frequency')
plt.legend(ncol=4)
plt.show()
In [20]:
# Normalized AP frequency in handovers.
fig1, ax = plt.subplots()
for b in mBuildings.keys():
    normFreqAP = (freqAP[b] - np.min(freqAP[b]))/np.ptp(freqAP[b])
    ax.plot(sorted(normFreqAP,reverse=True),label=b)
ax.set_title('normalized frequency APs  in handovers')
ax.set_xlabel('APs')
ax.set_ylabel('Frequency')
ax.axhline(y=0.4, linestyle='dashed', alpha=0.5,color='#004f6d')
ax.text(x=40, y=0.4+0.05, s="40%", alpha=0.7, color='#004f6d')
plt.legend(ncol=4)
plt.show()
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in true_divide
  after removing the cwd from sys.path.
In [21]:
dfAps = pd.DataFrame()
for b in mBuildings.keys():
    dfAps[b] = np.concatenate((freqAP[b],([np.nan]*(bigBuild-len(freqAP[b])))))
dfAps = dfAps.T    
fig1, ax = plt.subplots()
dfAps.mean().plot()
ax.set_title('Average frequency of APs in handovers')
ax.set_xlabel('APs')
ax.set_ylabel('Average Frequency')
plt.show()

Part2. Movements in all the dataset by user

In [27]:
df = pd.read_csv("proximity.txt","	")
df = df.rename(columns={"hashed_sta_eth_mac": "hashedmac"})
df = df.sort_values(by=['hashedmac', 'date_time'],ascending=True)

df['date_time'] = pd.to_datetime(df['date_time']) 

#Grouping same hashed macs by time difference as a new users
df['diff'] = df['date_time'].sub(df.groupby('hashedmac')['date_time'].shift()).dt.total_seconds()/60 #Xisco code ;)
threshold = 10 #NOTE: TODO how to measure the effect of this threshold in the results... 
df["id"] = ((df['diff']>threshold) | (df['hashedmac'] != df['hashedmac'].shift())).cumsum()

#Clean the dataset
df.reset_index(inplace=True)
df = df.drop(columns=['date_time',"json_name","apiDateTime"])
df = df.drop(columns=["hashedmac","index","diff"])
df = df.drop(columns=['radio_mac_addr', 'rssi_val', 'target_type', 'ts'])

print(df.tail())
               ap_name      id
1223992    AP-MA-A-W21  579625
1223993    AP-MA-A-W21  579625
1223994    AP-MA-A-W22  579625
1223995   AP-MA-A1-W07  579625
1223996  AP-RES-P3-W23  579626
In [28]:
lmacs = df.groupby("id")["ap_name"].apply(list)
lent = list(map(lambda x:len(x),lmacs))
df = pd.DataFrame(zip(lmacs,lent),columns=["aps","length"])

nmoves = np.sum(df.length>1)
rtmoves = nmoves/df.shape[0]
print("Percentatge of connections with more than one APs: %.3f%%"%(rtmoves*100))
#Number of different hashed macs in total
(unique, counts) = np.unique(df.length, return_counts=True)
assert np.sum(counts[1:])==nmoves

fig, ax = plt.subplots()
ax.bar(unique,counts)
ax.set_title('User handover frequency in all buildings (one week)')
plt.show()
Percentatge of connections with more than one APs: 45.959%
In [29]:
equivAP={"MO":"MO","GC":"GC","MA":"MA","RL":"RL","AT":"AT","JO":"JO","ITD":"ITD","RES":"RES","SCT":"SCT","IE":"IE","CEP":"MA","SL":"SL","CJ":"CJ","CTI":"CTI","PF":"AT","CL":"CL"}

def getNames(x):
    return [equivAP[i.split("-")[1]] for i in x]
df["builds"] = list(map(getNames,df.aps))


#More than one handover with a list of ID buildings.
df2 = df[df.length>1]
df2["diffBuilds"] = list(map(lambda x:len(set(x)),df2.builds))

(unique, counts) = np.unique(df2.diffBuilds, return_counts=True)
fig, ax = plt.subplots()
ax.bar(unique[1:],counts[1:])
ax.set_title('User handover frequency among different buildings (one week)')
ax.set_xlabel('number of buildings')
ax.set_ylabel('users')
plt.show()

nmovediff = np.sum(counts[1:])
rtmoves = nmovediff/df.shape[0]
print("Percentatge of connections with more than one APs among diff. buildings: %.3f%%"%(rtmoves*100))
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel_launcher.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
Percentatge of connections with more than one APs among diff. buildings: 2.839%
In [30]:
edges = []
nodes = set()
for aps in df2[df2.diffBuilds>1].aps:
    aps = list(set(aps))
    for ap1,ap2 in zip(aps,aps[1:]):
        b1 = ap1.split("-")[1]
        b2 = ap2.split("-")[1]
        if b1!=b2:
            edges.append((b1,b2))
            nodes.add(b1)
            nodes.add(b2)
    
nodes = list(nodes) 

attr_edges = dict(collections.Counter(edges))
  
source,target,value=[],[],[]
for item in attr_edges.items():
    source.append(item[0][0])
    target.append(item[0][1])
    value.append(item[1])
In [31]:
edgeBuilding = pd.DataFrame({"source":source,"target":target,"value":value})
edgeBuilding["target"] = edgeBuilding["target"] +"_"
sources_order = edgeBuilding.groupby("source").sum().sort_values("value")[::-1]
targets_order = edgeBuilding.groupby("target").sum().sort_values("value")[::-1]
allnodes = np.concatenate((sources_order.index.values,targets_order.index.values))
nodesSankey = hv.Dataset(pd.DataFrame({"buildings":allnodes}))
value_dim = hv.Dimension('value', unit='dev.')
betBuildings = hv.Sankey((edgeBuilding, nodesSankey),["source","target"],vdims=value_dim)
betBuildings.opts(
    opts.Sankey(labels='buildings', label_position='right', width=900, height=500, cmap='Set1',
                edge_color=dim('source').str(), node_color=dim('buildings').str()))
Out[31]:
In [34]:
edgeBuilding = pd.DataFrame({"source":source,"target":target,"value":value})
nodeBuilding = hv.Dataset(pd.DataFrame({"building":allnodes}))
chord = hv.Chord((edgeBuilding, nodeBuilding))
chord.opts(
    opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source'), 
               labels='building', node_color=dim('building').str(),height=600,width=600))
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('angle', 16), ('text', 32), ('x', 16), ('y', 16)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('angle', 16), ('text', 32), ('x', 16), ('y', 16)
Out[34]:
In [38]:
nodes = sorted(nodes)
mapIdBuilding = dict(zip(nodes,range(len(nodes))))
freq = np.zeros((len(nodes),len(nodes)))
for b1,b2 in attr_edges:
    freq[mapIdBuilding[b1],mapIdBuilding[b2]] = attr_edges[(b1,b2)]


fig, ax = plt.subplots()

c = ax.pcolor(freq, edgecolors='k', linewidths=4)
ax.set_title('User handover frequency in all buildings (excluding self.buildings)')

ax.set_xticks(np.arange(len(nodes)) + 0.5, minor=False)
ax.set_yticks(np.arange(len(nodes)) + 0.5, minor=False)

ax.set_xticklabels(nodes,rotation=90)
ax.set_yticklabels(nodes)

fig.tight_layout()
plt.show()

Return to webpage home:

Home

From the Time series perspective

In [1]:
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import STL
In [4]:
df = pd.read_csv("proximity.txt","	")
df = df.rename(columns={"hashed_sta_eth_mac": "hashedmac"})
df = df.sort_values(by=['hashedmac', 'date_time'],ascending=True)

df['date_time'] = pd.to_datetime(df['date_time']) # convertimos a datetime
df.index = df.date_time
df["one"] = np.ones(df.shape[0])
In [5]:
days = set(df.index.day)
nplots = len(set(days))
cols = 7
rows = nplots//cols+1

fig, axs = plt.subplots(rows,cols,figsize=(25,15))
row = 0
for p,d in enumerate(days):
    day = pd.to_datetime("10-%i-2020"%d)
    cold = day.weekday()
    if cold == 0: row +=1 #take care of the first element
    dm =  df.loc["10-%i-2020"%d].one.resample('5min').sum()   
    dm.plot(ax=axs[row,cold])    
    axs[row,cold].title.set_text("10-%i-2020"%d)
    axs[row,cold].set_ylim(0,5000)
fig.suptitle("Freq. of handovers in AP in 5minutes period", fontsize=20)   
Out[5]:
Text(0.5, 0.98, 'Freq. of handovers in AP in 5minutes period')
In [16]:
ds = df.loc["10-05-2020":"10-09-2020"].one.resample('5min').sum()   
ds = ds.asfreq('5T')

res = STL(ds, robust = True,period=5).fit() 
res.plot()
plt.show()
In [6]:
# =============================================================================
# #The AP with more handovers
# =============================================================================
# g = df.groupby(["ap_name"]).agg({"one":np.sum})
# print(g[g.one==g.one.max()])

ap = "AP-IE-IE-W06"
dfa = df[df.ap_name==ap]
fig, axs = plt.subplots(rows,cols,figsize=(25,15))
row = 0
for p,d in enumerate(days):
    day = pd.to_datetime("10-%i-2020"%d)
    cold = day.weekday()
    if cold == 0: row +=1 #take care of the first element
    dm =  dfa.loc["10-%i-2020"%d].one.resample('5min').sum()   
    dm.plot(ax=axs[row,cold])    
    axs[row,cold].title.set_text("10-%i-2020"%d)
    axs[row,cold].set_ylim(0,70)
fig.suptitle("Handovers in AP %s"%ap, fontsize=30)   
# A lot of noise in the AP on weekends 


# sunday: more freq. is  AP-IE-IE-W06  2292.0 !!
g = df.loc["10-04-2020"].groupby(["ap_name"]).agg({"one":np.sum})
print(g[g.one==g.one.max()])
                 one
ap_name             
AP-IE-IE-W06  2292.0
In [7]:
# =============================================================================
# Use of APs on weedays and weekends (only one sunday)
# =============================================================================
g = df.groupby(["ap_name"]).agg({"one":np.sum})
g = g.sort_index()
g.one.plot()
plt.title("AP freq. handover", fontsize=20)   

gw = df.loc["10-04-2020"].groupby(["ap_name"]).agg({"one":np.sum})
gw = gw.sort_index()

dg = pd.concat([g,gw],axis=1)
dg.columns =["all","sunday"]
dg.sunday = dg.sunday*10 #MAGIC NUMBER!
dg.plot()
plt.title("AP freq. handover and APs freq on sunday *10", fontsize=20)   
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel_launcher.py:12: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  if sys.path[0] == '':
Out[7]:
Text(0.5, 1.0, 'AP freq. handover and APs freq on sunday *10')
In [9]:
# =============================================================================
# AP without handovers on weekend (note: only using one sunday)
# =============================================================================
dg2 = dg[dg.sunday.isnull()]
print(dg2[dg2["all"]==dg2["all"].max()])
ap = "AP-RL-PPAL-W30"
dfa = df[df.ap_name==ap]
fig, axs = plt.subplots(rows,cols,figsize=(25,15))
row = 0
for p,d in enumerate(days):
    day = pd.to_datetime("10-%i-2020"%d)
    cold = day.weekday()
    if cold == 0: row +=1 #take care of the first element
    dm =  dfa.loc["10-%i-2020"%d].one.resample('5min').sum() 
    if len(dm)>0:
        dm.plot(ax=axs[row,cold])    
        axs[row,cold].title.set_text("10-%i-2020"%d)
        axs[row,cold].set_ylim(0,70)
fig.suptitle("Handovers in AP %s"%ap, fontsize=30)   
                   all  sunday
AP-RL-PPAL-W30  6949.0     NaN
Out[9]:
Text(0.5, 0.98, 'Handovers in AP AP-RL-PPAL-W30')
In [ ]: