Skip to content

Commit

Permalink
adds seed scripts which generate more realistic data
Browse files Browse the repository at this point in the history
  • Loading branch information
Snorre98 committed Oct 24, 2024
1 parent 33091d7 commit 0487963
Show file tree
Hide file tree
Showing 6 changed files with 749 additions and 145 deletions.
2 changes: 1 addition & 1 deletion backend/root/management/commands/seed_scripts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@
('recruitment_interviewavailability', recruitment_interviewavailability.seed),
('recruitment_separate_position', recruitment_separate_position.seed),
('recruitment_applications', recruitment_applications.seed),
('recruitment_occupied_time', recruitment_occupied_time.seed),
('recruitment_position_interviewers', recruitment_position_interviewers.seed),
('recruitment_occupied_time', recruitment_occupied_time.seed),
# Example seed (not run unless targeted specifically)
('example', example.seed),
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,42 +1,177 @@
from __future__ import annotations

from random import randint
from random import choice, sample, randint

from samfundet.models.general import User
from django.db import transaction
from django.contrib.auth.hashers import make_password

from samfundet.models.general import User, Campus
from samfundet.models.recruitment import RecruitmentPosition, RecruitmentApplication

# Some example data to use for the new RecruitmentApplication instances
# Example data for applications
APPLICATION_DATA = {
'application_text': 'This is the application text',
'application_text': 'This is the application text written by the applicant.',
'applicant_priority': 0,
'recruiter_priority': 0,
'recruiter_status': 0,
}


class ApplicantGenerator:
FIRST_NAMES = ['Ole', 'Lars', 'Anders', 'Magnus', 'Erik', 'Jonas', 'Henrik', 'Emma', 'Nora', 'Ida', 'Sara', 'Maria', 'Anna', 'Ingrid']
LAST_NAMES = ['Hansen', 'Johansen', 'Olsen', 'Larsen', 'Andersen', 'Pedersen', 'Nilsen', 'Kristiansen', 'Jensen', 'Karlsen']
EMAIL_DOMAINS = ['soker.no', 'applicant.com', 'applying.no']

def __init__(self):
self.used_usernames = set(User.objects.filter(username__startswith='APLCT-').values_list('username', flat=True))
self.used_emails = set(User.objects.filter(username__startswith='APLCT-').values_list('email', flat=True))
# Pre-hash the default password
self.hashed_password = make_password('Django123')

def _normalize_string(self, text: str) -> str:
chars_map = {'æ': 'ae', 'ø': 'o', 'å': 'a', 'Æ': 'ae', 'Ø': 'o', 'Å': 'a'}
for old, new in chars_map.items():
text = text.replace(old, new)
return text.lower()

def generate_user_data(self, firstname: str, lastname: str) -> tuple[str, str]:
firstname_norm = self._normalize_string(firstname)
lastname_norm = self._normalize_string(lastname)

base_username = f'APLCT-{firstname_norm[:3]}{lastname_norm[:3]}'
username = base_username
counter = 1

while username in self.used_usernames:
username = f'{base_username}{counter}'
counter += 1

email = f'{username}@{choice(self.EMAIL_DOMAINS)}'
while email in self.used_emails:
email = f'{username}{counter}@{choice(self.EMAIL_DOMAINS)}'
counter += 1

self.used_usernames.add(username)
self.used_emails.add(email)
return username, email

def generate_users_batch(self, count: int, campus_list: list[Campus]) -> list[User]:
users = []
for _ in range(count):
firstname = choice(self.FIRST_NAMES)
lastname = choice(self.LAST_NAMES)
username, email = self.generate_user_data(firstname, lastname)

user = User(
username=username,
email=email,
password=self.hashed_password, # Use pre-hashed password
first_name=f'APLCT-{firstname}',
last_name=lastname,
is_superuser=False,
campus=choice(campus_list),
)
users.append(user)
return users


def distribute_applications(
user_ids: list[int],
positions: list[RecruitmentPosition],
) -> list[dict]:
"""
Distribute applications among users ensuring each user applies to multiple positions.
Returns a list of application data dictionaries.
"""
applications = []
positions_by_recruitment = {}

# Group positions by recruitment
for position in positions:
if position.recruitment_id not in positions_by_recruitment:
positions_by_recruitment[position.recruitment_id] = []
positions_by_recruitment[position.recruitment_id].append(position)

# For each user, select 2-5 positions from each recruitment period they're interested in
for user_id in user_ids:
# Randomly select 1-3 recruitment periods for this user
num_recruitments = randint(1, min(3, len(positions_by_recruitment)))
selected_recruitments = sample(list(positions_by_recruitment.keys()), num_recruitments)

for recruitment_id in selected_recruitments:
recruitment_positions = positions_by_recruitment[recruitment_id]
# Apply for 2-5 positions in this recruitment period
num_positions = randint(2, min(5, len(recruitment_positions)))
selected_positions = sample(recruitment_positions, num_positions)

# Create applications for selected positions
for position in selected_positions:
application_data = APPLICATION_DATA.copy()
application_data.update(
{
'recruitment_position_id': position.id,
'recruitment_id': recruitment_id,
'user_id': user_id,
'applicant_priority': randint(1, num_positions), # Set a random priority
}
)
applications.append(application_data)

return applications


def seed():
yield 0, 'recruitment_applications'

# Clean up existing data
RecruitmentApplication.objects.all().delete()
yield 0, 'Deleted old applications'

positions = RecruitmentPosition.objects.all()
users = User.objects.all()
created_count = 0

for position_index, position in enumerate(positions):
for _ in range(randint(0, 5)): # Create between 0 and 5 instances for each position
application_data = APPLICATION_DATA.copy()
application_data.update(
{
'recruitment_position': position,
'recruitment': position.recruitment,
'user': users[randint(0, len(users) - 1)], # random user from all users
}
)
_application, created = RecruitmentApplication.objects.get_or_create(**application_data)
yield 10, 'Deleted old applications'

User.objects.filter(username__startswith='APLCT-').delete()
yield 20, 'Deleted old applicant users'

# Create new applicant users
applicant_generator = ApplicantGenerator()
campus_list = list(Campus.objects.all())
total_applicants = 1200
batch_size = 100

# Create users in batches and save them
with transaction.atomic():
for i in range(0, total_applicants, batch_size):
current_batch = min(batch_size, total_applicants - i)
user_batch = applicant_generator.generate_users_batch(current_batch, campus_list)
User.objects.bulk_create(user_batch)
yield 20 + (i + current_batch) / total_applicants * 30, f'Created {i + current_batch} applicant users'

# Get all positions and applicant user IDs
positions = list(RecruitmentPosition.objects.all().select_related('recruitment'))
applicant_user_ids = list(User.objects.filter(username__startswith='APLCT-').values_list('id', flat=True))

# Create applications data
all_applications = distribute_applications(applicant_user_ids, positions)
applications_created = 0
batch_size = 100

# Create applications in batches
for i in range(0, len(all_applications), batch_size):
batch = all_applications[i : i + batch_size]
RecruitmentApplication.objects.bulk_create([RecruitmentApplication(**data) for data in batch])
applications_created += len(batch)

if i % 1000 == 0: # Report progress every 1000 applications
progress = 50 + (i / len(all_applications) * 50)
yield min(progress, 100), f'Created {applications_created} applications'

if created:
created_count += 1
yield (position_index + 1) / len(positions), 'recruitment_applications'
# Calculate some statistics for the final message
total_users = len(applicant_user_ids)
avg_applications = applications_created / total_users

yield 100, f'Created {created_count} recruitment_applications'
yield (
100,
(
f'Created {applications_created} recruitment applications '
f'for {total_users} applicant users '
f'(average {avg_applications:.1f} applications per user)'
),
)
Loading

0 comments on commit 0487963

Please sign in to comment.