import { COLLECTIONS, INDICES } from "../types/utils";
import logger from "../utils/logger";

const BATCH_SIZE = 10000;

/**
 * Runs an optimized aggregation pipeline for large datasets using efficient pagination
 * and memory management strategies.
 */
export const runPaginatedQuery = async (
  realmUser,
  collection,
  operator = null,
  collector = null,
  projection = null,
  sort = null
) => {
  if (!collection || !(collection in COLLECTIONS)) {
    logger.error(
      `Collection '${collection}' doesn't exist in the database. ` +
      "Please choose a valid collection to run your query on."
    );
    return null;
  }

  const dbRef = realmUser.mongoClient("mongodb-atlas").db("123wellness");
  const collectionRef = dbRef.collection(collection);

  // Optimize projection by only selecting needed fields
  const optimizedProjection = projection || Object.fromEntries(
    Object.keys(COLLECTIONS[collection]).map(key => [key, 1])
  );

  try {
    // Build the base pipeline
    const pipeline = [];

    // Add initial $match stage if we have an operator or collector
    if (operator || collector) {
      const operation = operator || collector;
      pipeline.push({
        $search: {
          index: INDICES[collection],
          ...operation
        }
      });
    }

    // Add sort stage if specified
    if (sort) {
      pipeline.push({ $sort: sort });
    }

    // Add projection to minimize data transfer
    pipeline.push({ 
      $project: optimizedProjection
    });

    // Use $facet to get both total count and paginated results in one query
    pipeline.push({
      $facet: {
        metadata: [{ $count: "total" }],
        data: [{ $limit: BATCH_SIZE }]
      }
    });

    // Execute the aggregation with disk use allowed for large datasets
    const result = await collectionRef.aggregate(pipeline, { allowDiskUse: false });
    
    if (!result || !result[0]) {
      return [];
    }

    const { metadata, data } = result[0];
    const total = metadata[0]?.total || 0;

    // If we have all the data in one batch, return it
    if (data.length >= total || data.length === 0) {
      return data;
    }

    // For larger datasets, continue fetching in parallel batches
    const remainingBatches = Math.ceil((total - data.length) / BATCH_SIZE);
    const batchPromises = [];

    for (let i = 1; i <= remainingBatches; i++) {
      const skip = i * BATCH_SIZE;
      const batchPipeline = [...pipeline];
      
      // Remove the $facet stage
      batchPipeline.pop();
      
      // Add skip and limit
      batchPipeline.push(
        { $skip: skip },
        { $limit: BATCH_SIZE }
      );

      batchPromises.push(
        collectionRef.aggregate(batchPipeline, { allowDiskUse: false })
      );
    }

    // Wait for all batches and combine results
    const batchResults = await Promise.all(batchPromises);
    const allRecords = [
      ...data,
      ...batchResults.flat()
    ];

    return allRecords;

  } catch (error) {
    logger.error('Error in runPaginatedQuery:', error);
    throw error;
  }
};
