def split_data_into_parts(total_data_count, num_parts=4, percentage=0.05): """ Splits the total data into four parts, each containing a specified percentage of the total data. Each part will contain unique, non-overlapping elements. Args: total_data_count (int): The total number of data points. num_parts (int): The number of parts to divide the data into (default is 4). percentage (float): The percentage of data points each part should contain (default is 0.05). Returns: List[List[int]]: A list of lists, where each inner list contains the indices for one part. """ # Calculate the number of elements in each part num_elements_per_part = int(total_data_count * percentage) # Ensure that we have enough data to split into the desired number of parts if num_elements_per_part * num_parts > total_data_count: raise ValueError("Not enough data to split into the specified number of parts with the given percentage.") # Generate a list of all indices all_indices = list(range(total_data_count)) # Split the indices into non-overlapping parts parts = [] for i in range(num_parts): start_idx = i * num_elements_per_part end_idx = start_idx + num_elements_per_part part_indices = all_indices[start_idx:end_idx] parts.append(part_indices) return parts def get_percentage_segment(index, total): # 计算每段的长度(5% 的数据) segment_size = max(1, int(total * 0.05)) # 计算开始索引和结束索引 start = index * segment_size end = start + segment_size # 确保结束索引不超过总数 if end > total: end = total # 返回指定段的索引列表 return list(range(start, end)) def find_index_in_parts(parts, index): """ Finds the part containing the given index. Args: parts (List[List[int]]): A list of parts, where each part is a list of indices. index (int): The index to search for. Returns: Tuple[bool, int]: A tuple containing a boolean indicating if the index is found, and the index of the part if found, otherwise -1. """ for i, part in enumerate(parts): if index in part: return True, i return False, -1 # Example usage total_data_count = 1000 # Example total number of data points parts = split_data_into_parts(total_data_count) # Check if index 123 is in any of the parts index_to_find = 123 found, part_index = find_index_in_parts(parts, index_to_find) for part in parts: print(part) if found: print(f"Index {index_to_find} is in part {part_index + 1}") else: print(f"Index {index_to_find} is not in any of the parts") print(get_percentage_segment(1, 200))