123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- def split_data_into_parts(total_data_count, num_parts=4, percentage=0.05):
- """
- Splits the total data into four parts, each containing a specified percentage of the total data.
- Each part will contain unique, non-overlapping elements.
- Args:
- total_data_count (int): The total number of data points.
- num_parts (int): The number of parts to divide the data into (default is 4).
- percentage (float): The percentage of data points each part should contain (default is 0.05).
- Returns:
- List[List[int]]: A list of lists, where each inner list contains the indices for one part.
- """
- # Calculate the number of elements in each part
- num_elements_per_part = int(total_data_count * percentage)
- # Ensure that we have enough data to split into the desired number of parts
- if num_elements_per_part * num_parts > total_data_count:
- raise ValueError("Not enough data to split into the specified number of parts with the given percentage.")
- # Generate a list of all indices
- all_indices = list(range(total_data_count))
- # Split the indices into non-overlapping parts
- parts = []
- for i in range(num_parts):
- start_idx = i * num_elements_per_part
- end_idx = start_idx + num_elements_per_part
- part_indices = all_indices[start_idx:end_idx]
- parts.append(part_indices)
- return parts
- def get_percentage_segment(index, total):
- # 计算每段的长度(5% 的数据)
- segment_size = max(1, int(total * 0.05))
- # 计算开始索引和结束索引
- start = index * segment_size
- end = start + segment_size
- # 确保结束索引不超过总数
- if end > total:
- end = total
- # 返回指定段的索引列表
- return list(range(start, end))
- def find_index_in_parts(parts, index):
- """
- Finds the part containing the given index.
- Args:
- parts (List[List[int]]): A list of parts, where each part is a list of indices.
- index (int): The index to search for.
- Returns:
- Tuple[bool, int]: A tuple containing a boolean indicating if the index is found,
- and the index of the part if found, otherwise -1.
- """
- for i, part in enumerate(parts):
- if index in part:
- return True, i
- return False, -1
- # Example usage
- total_data_count = 1000 # Example total number of data points
- parts = split_data_into_parts(total_data_count)
- # Check if index 123 is in any of the parts
- index_to_find = 123
- found, part_index = find_index_in_parts(parts, index_to_find)
- for part in parts:
- print(part)
- if found:
- print(f"Index {index_to_find} is in part {part_index + 1}")
- else:
- print(f"Index {index_to_find} is not in any of the parts")
- print(get_percentage_segment(1, 200))
|