You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi everyone,
I modified the DQN algorithm in this repository to a multi-agent DQN approach for a wireless network environment. Actually, I wrote this code inspired by a repository on GitHub. Although the original code works well, when I change the environment, the following error occurs. Traceback (most recent call last): File "D:/main -DQN.py", line 452, in <module> main() File "D:/main -DQN.py", line 432, in main algo = DQN( args) # n_clusters is the action dimension in DQN File "D:/main -DQN.py", line 158, in __init__ self.agent = Agent( args, self.tau) File "D:/main -DQN.py", line 246, in __init__ self.model = self.network() File "D:/main -DQN.py", line 254, in network inp = Input((self.state_dim)) File "C:\Users\AppData\Roaming\Python\Python37\site-packages\keras\engine\topology.py", line 1451, in Input batch_shape = (None,) + tuple(shape) TypeError: 'int' object is not iterable
The complete code is as follows:
`
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
import pandas as pd
import numpy as np
import sys
import os
import copy, json, argparse
from numpy import pi
from random import random, uniform, choices, randint, sample, randrange
import random
import math
from tqdm import tqdm
import keras.backend as K
from keras.optimizers import Adam
from keras.models import Model
from keras.layers import Dense, Flatten, Input
from collections import deque
class Environ:
def __init__(self, args):
self.args=args
self.state_dim= (self.args.A, )
self.action_dim=args.C
self.bs = complex((500 / 2), (500/ 2))
self.S=(np.zeros(self.args.A)).reshape(-1)
def Location(self):
rx = uniform(0, 500)
ry = uniform(0, 500)
Loc = complex(rx, ry)
return Loc
def PathGain(self,Loc):
d = abs(Loc- self.bs)
d=d **(-3)
u = np.random.rand(1, 1)
sigma = 1
x = sigma * np.sqrt(-2 * np.log(u))
h= d* x
return h
def reset(self): # Reset the states
s=np.zeros(self.args.A)
return s.reshape(-1)
def RecievePower(self,UsersLoc):
H=self.PathGain(UsersLoc)
UsersRecievePower=self.args.P*H
return UsersRecievePower
def TotalRate(self, actionRB_i,actionRB):
interference = self.args.Noise
Loc_i=self.Location()
for j in range(self.args.A):
if actionRB_i ==actionRB[j] :
Loc_j = self.Location()
RecievePower_j = self.RecievePower(Loc_j)
interference = interference + RecievePower_j
else:
interference= interference
RecievePower_i = self.RecievePower(Loc_i)
SINR = interference / (interference-RecievePower_i)
Rate =self.args.BW*( np.log2( SINR))
return Rate
def computeQoS(self,actionRB,actionRB_i):
TotalRate=self.TotalRate(actionRB,actionRB_i)
if TotalRate >=self.args.Rmin:
QoS=1.0
else:
QoS=0.0
return QoS
def ComputeState(self,actionRB):
QoS=np.zeros(self.args.A)
for i in range(self.args.A):
actionRB_i=actionRB[i]
QoS[i] = self.computeQoS(actionRB,actionRB_i)
S = np.zeros( self.args.A)
for i in range(self.args.A):
S[i]=QoS[i]
self.S=S
return self.S.reshape(-1)
def Reward(self,actionRB,actionRB_i):
Rate = np.zeros(self.args.A)
Satisfied_Users = 0
for i in range(self.args.A):
Rate[i] = self.TotalRate(actionRB, actionRB_i)
Satisfied_Users = Satisfied_Users + self.computeQoS(actionRB)
TotalRate = 0.0
TotalPower = self.args.circuitPower
for i in range(self.args.A):
TotalRate = TotalRate + Rate[i]
TotalPower = TotalPower + self.args.P
if Satisfied_Users == self.args.A:
reward = TotalRate / TotalPower
else:
reward = self.args.negative_cost
return reward
def step(self,actionRB):
next_s = self.ComputeState(actionRB)
r = self.Reward(actionRB)
done = False
info = None
return next_s, r, done, info
class Environment(object):
def __init__(self, gym_env, action_repeat):
self.env = gym_env
self.timespan = action_repeat
self.gym_actions = 2 # range(gym_env.action_space.n)
self.state_buffer = deque()
def get_action_size(self):
return self.env.action_dim
def get_state_size(self):
return self.env.state_dim
def reset(self):
# Clear the state buffer
self.state_buffer = deque()
x_t = self.env.reset()
s_t = np.stack([x_t for i in range(self.timespan)], axis=0)
for i in range(self.timespan - 1):
self.state_buffer.append(x_t)
return s_t
def step(self, action):
x_t1, r_t, terminal, info = self.env.step(action)
previous_states = np.array(self.state_buffer)
s_t1 = np.empty((self.timespan, *self.env.state_dim))
s_t1[:self.timespan - 1, :] = previous_states
s_t1[self.timespan - 1] = x_t1
# Pop the oldest frame, add the current frame to the queue
self.state_buffer.popleft()
self.state_buffer.append(x_t1)
return s_t1, r_t, terminal, info
def render(self):
return self.env.render()
def main(args=None):
# Parse arguments
if args is None:
args = sys.argv[1:]
args = parse_args(args)
# Check if a GPU ID was set
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
set_session(get_session())
summary_writer = tf.summary.FileWriter("/tensorboard_" + args.env)
# Initialize the wireless environment
users_env = Environ(args)
# print(users_env)
# Wrap the environment to use consecutive frames
env = Environment(users_env, args.consecutive_frames)
env.reset()
# Define parameters for the DDQN and DDPG algorithms
state_dim = env.get_state_size()
action_dim = users_env.action_dim
# The maximum and minimum values for precoding vectors
# act_range = 1
# act_min = 0
# Initialize the DQN algorithm for the clustering optimization
algo = DQN( args) # n_clusters is the action dimension in DQN
# if args.step == "train":
# Train
stats = algo.train(env, args, summary_writer)
# Export results to CSV
if(args.gather_stats):
df = pd.DataFrame(np.array(stats))
df.to_csv(args.out_dir + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')
# df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')
# Save weights and close environments
exp_dir = '{}/models_A_{}_C_{}_Rmin_{}/'.format(args.out_dir, args.A, args.C, args.Rmin)
# exp_dir = '{}/models/'.format(args.type)
if not os.path.exists(exp_dir):
os.makedirs(exp_dir)
# Save DDQN
export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DQN", args.nepisodes, args.batch_size)
algo.save_weights(export_path)
if name == "main":
main()
`
Thanks in advance for your help.
The text was updated successfully, but these errors were encountered:
Hi everyone,
I modified the DQN algorithm in this repository to a multi-agent DQN approach for a wireless network environment. Actually, I wrote this code inspired by a repository on GitHub. Although the original code works well, when I change the environment, the following error occurs.
Traceback (most recent call last): File "D:/main -DQN.py", line 452, in <module> main() File "D:/main -DQN.py", line 432, in main algo = DQN( args) # n_clusters is the action dimension in DQN File "D:/main -DQN.py", line 158, in __init__ self.agent = Agent( args, self.tau) File "D:/main -DQN.py", line 246, in __init__ self.model = self.network() File "D:/main -DQN.py", line 254, in network inp = Input((self.state_dim)) File "C:\Users\AppData\Roaming\Python\Python37\site-packages\keras\engine\topology.py", line 1451, in Input batch_shape = (None,) + tuple(shape) TypeError: 'int' object is not iterable
The complete code is as follows:
`
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
import pandas as pd
import numpy as np
import sys
import os
import copy, json, argparse
from numpy import pi
from random import random, uniform, choices, randint, sample, randrange
import random
import math
from tqdm import tqdm
import keras.backend as K
from keras.optimizers import Adam
from keras.models import Model
from keras.layers import Dense, Flatten, Input
from collections import deque
class Environ:
class Environment(object):
class DQN:
def init(self, args):
# Environment and DQN parameters
self.args=args
self.action_dim = self.args.C
self.state_dim = self.args.A
self.buffer_size = self.args.capacity
# Memory Buffer for Experience Replay
self.buffer = MemoryBuffer(self.buffer_size)
self.epsilon=self.args.eps
self.tau = 1.0
self.agent = Agent( args, self.tau)
class Agent:
def init(self, args, tau):
self.args=args
self.state_dim = self.args.A
self.action_dim = self.args.C
self.tau = tau
self.lr=self.args.learningrate
# Initialize Deep Q-Network
self.model = self.network()
self.model.compile(Adam(self.lr), 'mse')
# Build target Q-Network
self.target_model = self.network()
self.target_model.compile(Adam(self.lr), 'mse')
self.target_model.set_weights(self.model.get_weights())
class MemoryBuffer(object):
def init(self, buffer_size):
# Standard Buffer
self.buffer = deque()
self.count = 0
self.buffer_size = buffer_size
def get_session():
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
return tf.Session(config=config)
def tfSummary(tag, val):
return tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=val)])
def gather_stats(agent, env):
score = []
for k in range(10):
old_state = env.reset()
cumul_r, done = 0, False
while not done:
a = agent.policy_action(old_state)
old_state, r, done, _ = env.step(a)
cumul_r += r
score.append(cumul_r)
return np.mean(np.array(score)), np.std(np.array(score))
def conv_block(inp, d=3, pool_size=(2, 2), k=3):
conv = conv_layer(d, k)(inp)
return MaxPooling2D(pool_size=pool_size)(conv)
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def parse_args(args):
parser = argparse.ArgumentParser(description='Training parameters')
#
parser.add_argument('--out_dir', type=str, default='experiments', help="Name of the output directory")
parser.add_argument('--consecutive_frames', type=int, default=2, help="Number of consecutive frames (action repeat)")
parser.add_argument('--gather_stats', dest='gather_stats', action='store_true', help="Compute Average reward per episode (slower)")
parser.add_argument('--A', type=int, default='10', help="The number of agents")
parser.add_argument('--C', type=int, default='30', help="The number of Resources")
parser.add_argument('--Noise', type=float, default='0.00000000000001', help="The background noise")
parser.add_argument('--BW', type=int, default='180000', help="The bandwidth")
parser.add_argument('--Rmin', type=int, default='1000000', help="Agents' QoS")
parser.add_argument('--P', type=float, default='0.01', help="Agents' transmit power")
parser.add_argument('--circuitPower', type=float, default='0.05', help="The circuit Power")
parser.add_argument('--negative_cost', type=float, default='-1.0', help="The negative cost")
parser.add_argument('--capacity', type=int, default='500', help="Capacity of Replay Buffer")
parser.add_argument('--learningrate', type=float, default='0.01', help="The learning rate")
parser.add_argument('--eps', type=float, default='0.8', help="The epsilon")
parser.add_argument('--eps_decay', type=float, default='0.99', help="The epsilon decay")
parser.add_argument('--eps_increment', type=float, default='0.003', help="The epsilon increment")
parser.add_argument('--batch_size', type=int, default='8', help="The batch size")
parser.add_argument('--gamma', type=float, default='0.99', help="The discount factor")
parser.add_argument('--nepisodes', type=int, default='500', help="The number of episodes")
parser.add_argument('--nsteps', type=int, default='500', help="The number of steps")
parser.add_argument('--env', type=str, default='Environ', help="Wireless environment")
parser.add_argument('--gpu', type=str, default="", help='GPU ID')
def main(args=None):
# Parse arguments
if args is None:
args = sys.argv[1:]
args = parse_args(args)
# Check if a GPU ID was set
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
set_session(get_session())
if name == "main":
main()
`
Thanks in advance for your help.
The text was updated successfully, but these errors were encountered: