import numpy as np
import pandas as pd
import networkx as nx
import collections
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score, train_test_split, RepeatedKFold, GridSearchCV, ParameterGrid
import multiprocessing
import warnings
import sklearn
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
print(sklearn.__version__)
import time
1.0.1
# Dummy data - Infrastructure
np.random.seed(2022)
train_size = 10000
data = {}
data["HWAvailability"] = np.random.choice(range(1,20),train_size)
data["EdgeNode"] = np.random.choice([0,1],train_size,p=[0.8,0.2])
data["Latency"] = (data["EdgeNode"]-1)*-np.random.randint(10,100,train_size)
data["Cost"] = np.abs(np.random.normal(10.0, 20.0, size=train_size)).round(2)
data["Degree"] = (data["EdgeNode"]-1)*-np.random.randint(0,4,train_size)
df = pd.DataFrame(data)
df.head(10)
HWAvailability | EdgeNode | Latency | Cost | Degree | |
---|---|---|---|---|---|
0 | 14 | 0 | 45 | 13.73 | 0 |
1 | 17 | 0 | 90 | 10.52 | 2 |
2 | 18 | 0 | 12 | 19.71 | 2 |
3 | 18 | 0 | 16 | 1.55 | 2 |
4 | 17 | 0 | 15 | 20.04 | 3 |
5 | 19 | 0 | 51 | 9.86 | 3 |
6 | 17 | 0 | 10 | 7.49 | 2 |
7 | 10 | 1 | 0 | 29.65 | 0 |
8 | 2 | 0 | 32 | 24.61 | 0 |
9 | 12 | 0 | 36 | 8.82 | 0 |
# Dummy data - allocation service
def myNaiveCriteria(row):
if row.EdgeNode == 0 and row.Cost <= 10:
return 1
if row.EdgeNode == 1 and row.Cost <= 30:
return 1
if row.HWAvailability <= 5 and row.Latency <= 30:
return 1
return 0
servicex = df.apply(myNaiveCriteria,axis=1)
print(np.sum(servicex)," of ",len(servicex))
4816 of 10000
servicex.head(10)
0 0 1 0 2 0 3 1 4 0 5 1 6 1 7 1 8 0 9 1 dtype: int64
df["alloc"] = servicex
X_train, X_test, y_train, y_test = train_test_split(
df.drop(columns = "alloc"),
df['alloc'],
random_state = 0)
print(len(X_train))
print(len(X_test))
7500 2500
start = time.perf_counter()
model = AdaBoostClassifier(n_estimators=100, random_state=0)
model.fit(X_train, y_train)
print(time.perf_counter()-start)
0.3914997990000302
#Another dummy data set
np.random.seed(2050)
train_size = 100
data = {}
data["HWAvailability"] = np.random.choice(range(1,20),train_size)
data["EdgeNode"] = np.random.choice([0,1],train_size,p=[0.8,0.2])
data["Latency"] = (data["EdgeNode"]-1)*-np.random.randint(10,100,train_size)
data["Cost"] = np.abs(np.random.normal(10.0, 20.0, size=train_size)).round(2)
data["Degree"] = (data["EdgeNode"]-1)*-np.random.randint(0,4,train_size)
start = time.perf_counter()
dff = pd.DataFrame(data)
forecasting = model.predict(X = dff)
print(time.perf_counter()-start)
0.02566551200015965
forecasting
array([1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1])
### Scaling
node_test, time_test = [], []
for n in range(1,50000,500):
np.random.seed(n)
train_size = n
print("total nodes:",n)
node_test.append(n)
data = {}
data["HWAvailability"] = np.random.choice(range(1,20),train_size)
data["EdgeNode"] = np.random.choice([0,1],train_size,p=[0.8,0.2])
data["Latency"] = (data["EdgeNode"]-1)*-np.random.randint(10,100,train_size)
data["Cost"] = np.abs(np.random.normal(10.0, 20.0, size=train_size)).round(2)
data["Degree"] = (data["EdgeNode"]-1)*-np.random.randint(0,4,train_size)
start = time.perf_counter()
dff = pd.DataFrame(data)
forecasting = model.predict(X = dff)
time_test.append(time.perf_counter()-start)
total nodes: 1 total nodes: 501 total nodes: 1001 total nodes: 1501 total nodes: 2001 total nodes: 2501 total nodes: 3001 total nodes: 3501 total nodes: 4001 total nodes: 4501 total nodes: 5001 total nodes: 5501 total nodes: 6001 total nodes: 6501 total nodes: 7001 total nodes: 7501 total nodes: 8001 total nodes: 8501 total nodes: 9001 total nodes: 9501 total nodes: 10001 total nodes: 10501 total nodes: 11001 total nodes: 11501 total nodes: 12001 total nodes: 12501 total nodes: 13001 total nodes: 13501 total nodes: 14001 total nodes: 14501 total nodes: 15001 total nodes: 15501 total nodes: 16001 total nodes: 16501 total nodes: 17001 total nodes: 17501 total nodes: 18001 total nodes: 18501 total nodes: 19001 total nodes: 19501 total nodes: 20001 total nodes: 20501 total nodes: 21001 total nodes: 21501 total nodes: 22001 total nodes: 22501 total nodes: 23001 total nodes: 23501 total nodes: 24001 total nodes: 24501 total nodes: 25001 total nodes: 25501 total nodes: 26001 total nodes: 26501 total nodes: 27001 total nodes: 27501 total nodes: 28001 total nodes: 28501 total nodes: 29001 total nodes: 29501 total nodes: 30001 total nodes: 30501 total nodes: 31001 total nodes: 31501 total nodes: 32001 total nodes: 32501 total nodes: 33001 total nodes: 33501 total nodes: 34001 total nodes: 34501 total nodes: 35001 total nodes: 35501 total nodes: 36001 total nodes: 36501 total nodes: 37001 total nodes: 37501 total nodes: 38001 total nodes: 38501 total nodes: 39001 total nodes: 39501 total nodes: 40001 total nodes: 40501 total nodes: 41001 total nodes: 41501 total nodes: 42001 total nodes: 42501 total nodes: 43001 total nodes: 43501 total nodes: 44001 total nodes: 44501 total nodes: 45001 total nodes: 45501 total nodes: 46001 total nodes: 46501 total nodes: 47001 total nodes: 47501 total nodes: 48001 total nodes: 48501 total nodes: 49001 total nodes: 49501
fig = plt.figure()
ax = plt.axes()
ax.plot(node_test, time_test)
[<matplotlib.lines.Line2D at 0x129ee29a0>]