0, 'processed' => 0, 'success' => 0, 'error' => 0, 'total' => 0, 'status' => 'Started', 'completed' => false, 'timestamp' => time() ]; file_put_contents($progress_file, json_encode($progress_data)); log_message("Progress file initialized"); // Reset GPT rate limits at the start - this is important GptHelper::resetRateLimit(); // Function to update progress function update_progress($progress_file, $progress, $processed, $success, $error, $total, $status, $completed = false) { $progress_data = [ 'progress' => $progress, 'processed' => $processed, 'success' => $success, 'error' => $error, 'total' => $total, 'status' => $status, 'completed' => $completed, 'timestamp' => time() ]; file_put_contents($progress_file, json_encode($progress_data)); } // Function to generate appropriate default responses based on question type and profile function generateDefaultResponse($question, $profile_data) { $question_text = strtolower($question['question_text']); switch ($question['question_type']) { case 'single_choice': case 'dropdown': if (!empty($question['options'])) { // For age-related questions, try to pick appropriate option if (strpos($question_text, 'age') !== false) { // Look for profile age and pick closest option foreach ($profile_data as $attr => $value) { if (stripos($attr, 'age') !== false) { foreach ($question['options'] as $option) { if (stripos($option, (string)$value) !== false || stripos($option, substr((string)$value, 0, 2)) !== false) { return $option; } } } } } // For gender questions if (strpos($question_text, 'gender') !== false) { foreach ($profile_data as $attr => $value) { if (stripos($attr, 'gender') !== false) { foreach ($question['options'] as $option) { if (stripos($option, (string)$value) !== false) { return $option; } } } } } // Default to middle option or first option $middle_idx = intval(count($question['options']) / 2); return $question['options'][$middle_idx] ?? $question['options'][0]; } return "Selected option"; case 'multiple_choice': if (!empty($question['options'])) { return $question['options'][0]; // Just pick first option } return "Selected options"; case 'rating_scale': $config = $question['config']; $start = isset($config['start']) ? $config['start'] : 1; $end = isset($config['end']) ? $config['end'] : 5; $mid = intval(($start + $end) / 2); return (string)$mid; case 'text': case 'textarea': case 'long_text': // Generate contextual text based on question content if (strpos($question_text, 'experience') !== false) { return "My experience has been generally positive with some challenges."; } elseif (strpos($question_text, 'opinion') !== false || strpos($question_text, 'think') !== false) { return "I believe this is an important topic that requires careful consideration."; } elseif (strpos($question_text, 'recommend') !== false) { return "I would recommend this to others based on my experience."; } elseif (strpos($question_text, 'improve') !== false) { return "There are several areas that could be improved for better results."; } else { return "This is based on my personal experience and demographic background."; } default: return "Response based on demographic profile"; } } try { log_message("Connecting to database"); $db = Database::getInstance(); log_message("Database connected successfully"); // Get selection details $stmt = $db->prepare(" SELECT s.*, p.name as project_name FROM selections s JOIN projects p ON s.project_id = p.id WHERE s.id = ? "); $stmt->bind_param('i', $selection_id); $stmt->execute(); $selection = $stmt->get_result()->fetch_assoc(); if (!$selection) { throw new Exception("Selection not found"); } log_message("Selection found: " . $selection['name']); // Get project's connected survey $stmt = $db->prepare(" SELECT s.id as survey_id, s.title as survey_title FROM project_surveys ps JOIN surveys s ON ps.survey_id = s.id WHERE ps.project_id = ? LIMIT 1 "); $stmt->bind_param('i', $selection['project_id']); $stmt->execute(); $connected_survey = $stmt->get_result()->fetch_assoc(); if (!$connected_survey) { throw new Exception("No survey connected to this project"); } log_message("Connected survey found: " . $connected_survey['survey_title']); // Get survey questions (only answerable questions) $stmt = $db->prepare(" SELECT id, question_text, question_type, options, config FROM survey_questions WHERE survey_id = ? AND question_type NOT IN ('section_header', 'descriptive_text', 'page_break') ORDER BY question_order ASC "); $stmt->bind_param('i', $connected_survey['survey_id']); $stmt->execute(); $questions_result = $stmt->get_result(); $questions = []; while ($question = $questions_result->fetch_assoc()) { // Parse options if they exist if ($question['options']) { $decoded_options = json_decode($question['options'], true); $question['options'] = is_array($decoded_options) ? $decoded_options : []; } else { $question['options'] = []; } // Parse config if it exists if ($question['config']) { $decoded_config = json_decode($question['config'], true); $question['config'] = is_array($decoded_config) ? $decoded_config : []; } else { $question['config'] = []; } $questions[] = $question; } if (empty($questions)) { throw new Exception("No questions found in the connected survey"); } log_message("Found " . count($questions) . " questions to process"); // Get all attributes for reference $attributes = []; $attr_query = $db->query("SELECT id, name FROM attributes ORDER BY created_at ASC"); while ($attr = $attr_query->fetch_assoc()) { $attributes[$attr['id']] = $attr['name']; } // Get selection members $stmt = $db->prepare(" SELECT sm.panelist_id, pd.attribute_values FROM selection_members sm LEFT JOIN panel_data pd ON sm.panelist_id = pd.panelist_id WHERE sm.selection_id = ? ORDER BY sm.id ASC "); $stmt->bind_param('i', $selection_id); $stmt->execute(); $members_result = $stmt->get_result(); $members = []; while ($member = $members_result->fetch_assoc()) { $members[] = $member; } if (empty($members)) { throw new Exception("No members found in selection"); } log_message("Found " . count($members) . " members to generate responses for"); // Clear existing responses for this selection $stmt = $db->prepare("DELETE FROM synthetic_responses WHERE selection_id = ?"); $stmt->bind_param('i', $selection_id); $stmt->execute(); log_message("Cleared existing responses for selection"); $total_members = count($members); $processed = 0; $success_count = 0; $error_count = 0; // Update progress with total count update_progress($progress_file, 0, 0, 0, 0, $total_members, 'Processing members...'); // Process each member foreach ($members as $index => $member) { $member_number = $index + 1; log_message("Generating responses for member {$member_number}/{$total_members} (ID: {$member['panelist_id']})"); update_progress($progress_file, round(($processed / $total_members) * 100), $processed, $success_count, $error_count, $total_members, "Processing member {$member_number}/{$total_members}..."); try { // Extract member attributes for context $attribute_values = json_decode($member['attribute_values'], true); if (!is_array($attribute_values)) { $attribute_values = []; } $profile_data = []; foreach ($attribute_values as $attr_id => $value) { if (isset($attributes[$attr_id])) { if (is_array($value)) { $profile_data[$attributes[$attr_id]] = implode(', ', $value); } else { $profile_data[$attributes[$attr_id]] = $value; } } } // Convert to readable format for GPT $profile_text = "Respondent Profile:\n"; foreach ($profile_data as $attr => $value) { $profile_text .= "- $attr: $value\n"; } // Generate survey responses using GPT with improved prompt $prompt = "You are a survey respondent with this demographic profile:\n"; foreach ($profile_data as $attr => $value) { $prompt .= "• $attr: $value\n"; } $prompt .= "\nRespond to each question below as this person would. Give ONLY your direct answer for each question - no explanations or extra text.\n\n"; foreach ($questions as $i => $question) { $q_num = $i + 1; $prompt .= "$q_num. " . $question['question_text'] . "\n"; // Add answer format hints if ($question['question_type'] == 'single_choice' || $question['question_type'] == 'dropdown') { if (!empty($question['options'])) { $prompt .= " Choose from: " . implode(" | ", $question['options']) . "\n"; } } else if ($question['question_type'] == 'multiple_choice') { if (!empty($question['options'])) { $prompt .= " Available: " . implode(" | ", $question['options']) . "\n"; } } else if ($question['question_type'] == 'rating_scale') { $config = $question['config']; $start = isset($config['start']) ? $config['start'] : 1; $end = isset($config['end']) ? $config['end'] : 5; $prompt .= " Rate from $start to $end\n"; } } $prompt .= "\nIMPORTANT: Answer in this exact format:\n"; $prompt .= "1. [Your answer to question 1]\n"; $prompt .= "2. [Your answer to question 2]\n"; $prompt .= "3. [Your answer to question 3]\n"; $prompt .= "And so on for all " . count($questions) . " questions. Give only the answer, nothing else."; $responses = []; $used_gpt = false; $retry_count = 0; $max_retries = 3; // Try to use GPT with retry mechanism for rate limits while (!$used_gpt && $retry_count < $max_retries) { try { // Use GPT to generate responses with improved system prompt $messages = [ ['role' => 'system', 'content' => 'You are a survey respondent. You must respond to exactly ' . count($questions) . ' questions. Format each response as: "1. [answer]", "2. [answer]", etc. Give only direct answers - no explanations, introductions, or extra text. Each answer should be realistic and appropriate for the given demographic profile.'], ['role' => 'user', 'content' => $prompt] ]; $gpt_response = GptHelper::makeRequest($messages); log_message("GPT response received for member " . $member_number); // Debug logging for response type log_message("GPT response type: " . gettype($gpt_response)); // Ensure we have a string response - CRITICAL FIX FOR THE ERROR if (is_array($gpt_response)) { if (isset($gpt_response['content'])) { $gpt_response = $gpt_response['content']; } else if (isset($gpt_response['message'])) { $gpt_response = $gpt_response['message']; } else { $gpt_response = json_encode($gpt_response); } log_message("Converted array response to string"); } if (!is_string($gpt_response)) { $gpt_response = (string)$gpt_response; log_message("Forced response to string type"); } if (empty($gpt_response)) { throw new Exception("Empty GPT response received"); } // Log the full GPT response for debugging log_message("Full GPT response: " . substr($gpt_response, 0, 500) . "..."); $response_lines = explode("\n", trim($gpt_response)); log_message("Split response into " . count($response_lines) . " lines"); // Process response lines with improved parsing $responses = []; $question_count = count($questions); foreach ($response_lines as $i => $line) { // CRITICAL: Ensure $line is a string before calling trim() if (is_array($line)) { log_message("Warning: Line $i is an array: " . json_encode($line)); $line = json_encode($line); } else if (!is_string($line)) { log_message("Warning: Line $i is not a string, type: " . gettype($line)); $line = (string)$line; } $line = trim($line); if (empty($line)) continue; // More aggressive parsing - handle various formats // Strip line numbers, bullets, or other prefixes $line = preg_replace('/^(\d+[\.\)]\s*|[\*\-\+]\s*|[A-Za-z][\.\)]\s*)/', '', $line); // Skip lines that look like instructions or formatting if (stripos($line, 'based on') !== false || stripos($line, 'profile') !== false || stripos($line, 'respondent') !== false || stripos($line, 'answer') !== false && strlen($line) < 20) { continue; } if (!empty($line) && strlen($line) > 1) { $responses[] = $line; log_message("Added response " . count($responses) . ": " . substr($line, 0, 100)); } } log_message("Extracted " . count($responses) . " responses from GPT output"); // If we don't have enough responses, try a different parsing approach if (count($responses) < $question_count) { log_message("Not enough responses, trying alternative parsing..."); // Try splitting by question numbers more aggressively $alternative_responses = []; $text = $gpt_response; // Look for numbered responses (1., 2., etc.) for ($q = 1; $q <= $question_count; $q++) { $pattern = '/(?:^|\n)\s*' . $q . '[\.\)]\s*([^\n\d]*?)(?=\s*(?:\n\s*\d+[\.\)]|\n\s*$|$))/s'; if (preg_match($pattern, $text, $matches)) { $answer = trim($matches[1]); if (!empty($answer)) { $alternative_responses[] = $answer; log_message("Alt parsing Q$q: " . substr($answer, 0, 50)); } } } // If still not enough responses, try extracting any text that looks like answers if (count($alternative_responses) < $question_count) { log_message("Still not enough responses, doing fallback extraction..."); // Try to extract any meaningful text as answers $sentences = preg_split('/[.!?]+/', $gpt_response); $fallback_responses = []; foreach ($sentences as $sentence) { $sentence = trim($sentence); if (strlen($sentence) > 5 && strlen($sentence) < 200) { // Reasonable answer length // Skip sentences that seem like instructions or meta-text if (!preg_match('/\b(based on|profile|respondent|answer|question)\b/i', $sentence)) { $fallback_responses[] = $sentence; if (count($fallback_responses) >= $question_count) break; } } } if (count($fallback_responses) > count($alternative_responses)) { log_message("Using fallback extraction - got " . count($fallback_responses) . " responses"); $responses = $fallback_responses; } else if (count($alternative_responses) > 0) { $responses = $alternative_responses; } } else { $responses = $alternative_responses; } } // Ensure we have enough responses - use smart defaults if needed $final_responses = []; for ($q_idx = 0; $q_idx < $question_count; $q_idx++) { if (isset($responses[$q_idx]) && !empty(trim($responses[$q_idx]))) { $final_responses[] = trim($responses[$q_idx]); } else { // Generate appropriate default based on question type $question = $questions[$q_idx]; $default_response = generateDefaultResponse($question, $profile_data); $final_responses[] = $default_response; log_message("Using default response for Q" . ($q_idx + 1) . ": " . $default_response); } } $responses = $final_responses; log_message("Final response count: " . count($responses) . " for " . $question_count . " questions"); $used_gpt = true; } catch (Exception $e) { log_message("GPT error (attempt " . ($retry_count + 1) . "): " . $e->getMessage()); $retry_count++; if ($retry_count >= $max_retries) { log_message("Max retries exceeded for member " . $member_number . ", generating fallback responses"); // Generate fallback responses based on question type $responses = []; foreach ($questions as $q_index => $question) { switch ($question['question_type']) { case 'single_choice': case 'dropdown': if (!empty($question['options'])) { $responses[] = $question['options'][0]; // Pick first option } else { $responses[] = "Option selected"; } break; case 'multiple_choice': if (!empty($question['options'])) { $responses[] = $question['options'][0]; // Pick first option } else { $responses[] = "Options selected"; } break; case 'rating_scale': $config = $question['config']; $start = isset($config['start']) ? $config['start'] : 1; $end = isset($config['end']) ? $config['end'] : 5; $mid = intval(($start + $end) / 2); $responses[] = (string)$mid; break; case 'text': case 'textarea': case 'long_text': default: $responses[] = "Response based on demographic profile"; break; } } $used_gpt = true; // Stop retrying } else { sleep(2); // Wait 2 seconds before retry } } } // Save responses to database $saved_responses = 0; foreach ($questions as $q_index => $question) { $response_text = isset($responses[$q_index]) ? $responses[$q_index] : "No response"; try { $stmt = $db->prepare(" INSERT INTO synthetic_responses (selection_id, survey_id, member_id, panelist_id, question_id, response_text) VALUES (?, ?, ?, ?, ?, ?) "); if (!$stmt) { throw new Exception("Failed to prepare statement: " . $db->getLastError()); } // Make sure we're using the correct data types $stmt->bind_param('iiisis', $selection_id, $connected_survey['survey_id'], $member_number, // member_id (sequential number) $member['panelist_id'], // panelist_id (the actual ID like SYN013618) $question['id'], $response_text ); if ($stmt->execute()) { $saved_responses++; log_message("Saved response for question {$question['id']}: " . substr($response_text, 0, 50) . "..."); } else { log_message("Failed to save response for question " . $question['id'] . ": " . $stmt->error); } } catch (Exception $e) { log_message("Database error saving response for question " . $question['id'] . ": " . $e->getMessage()); } } if ($saved_responses > 0) { $success_count++; log_message("Successfully saved {$saved_responses} responses for member " . $member_number); } else { $error_count++; log_message("Failed to save any responses for member " . $member_number); } } catch (Exception $e) { $error_count++; log_message("Error processing member " . $member_number . ": " . $e->getMessage()); } $processed++; // Update progress $progress_percentage = round(($processed / $total_members) * 100); update_progress($progress_file, $progress_percentage, $processed, $success_count, $error_count, $total_members, "Processed {$processed}/{$total_members} members", $processed >= $total_members); // Brief pause to prevent overwhelming the system usleep(100000); // 0.1 seconds } // Final completion update_progress($progress_file, 100, $processed, $success_count, $error_count, $total_members, "Completed! Generated responses for {$success_count}/{$total_members} members", true); log_message("Response generation completed"); log_message("Total processed: {$processed}"); log_message("Successful: {$success_count}"); log_message("Errors: {$error_count}"); } catch (Exception $e) { log_message("Fatal error: " . $e->getMessage()); update_progress($progress_file, 0, 0, 0, 1, 1, "Error: " . $e->getMessage(), true); exit(1); } log_message("Worker script completed successfully"); exit(0); ?>