| // SPDX-License-Identifier: Apache-2.0 |
| // ---------------------------------------------------------------------------- |
| // Copyright 2011-2020 Arm Limited |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| // use this file except in compliance with the License. You may obtain a copy |
| // of the License at: |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| // License for the specific language governing permissions and limitations |
| // under the License. |
| // ---------------------------------------------------------------------------- |
| |
| /** |
| * @brief Functions for generating partition tables on demand. |
| */ |
| |
| #include "astc_codec_internals.h" |
| |
| /* |
| Produce a canonicalized representation of a partition pattern |
| |
| The largest possible such representation is 432 bits, equal to 7 uint64_t values. |
| */ |
| static void gen_canonicalized_partition_table( |
| int texel_count, |
| const uint8_t* partition_table, |
| uint64_t canonicalized[7] |
| ) { |
| int i; |
| for (i = 0; i < 7; i++) |
| canonicalized[i] = 0; |
| |
| int mapped_index[4]; |
| int map_weight_count = 0; |
| for (i = 0; i < 4; i++) |
| mapped_index[i] = -1; |
| |
| for (i = 0; i < texel_count; i++) |
| { |
| int index = partition_table[i]; |
| if (mapped_index[index] == -1) |
| mapped_index[index] = map_weight_count++; |
| uint64_t xlat_index = mapped_index[index]; |
| canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F)); |
| } |
| } |
| |
| static int compare_canonicalized_partition_tables( |
| const uint64_t part1[7], |
| const uint64_t part2[7] |
| ) { |
| if (part1[0] != part2[0]) |
| return 0; |
| if (part1[1] != part2[1]) |
| return 0; |
| if (part1[2] != part2[2]) |
| return 0; |
| if (part1[3] != part2[3]) |
| return 0; |
| if (part1[4] != part2[4]) |
| return 0; |
| if (part1[5] != part2[5]) |
| return 0; |
| if (part1[6] != part2[6]) |
| return 0; |
| return 1; |
| } |
| |
| /* |
| For a partition table, detect partitionss that are equivalent, then mark them as invalid. This reduces the number of partitions that the codec has to consider and thus improves encode |
| performance. */ |
| static void partition_table_zap_equal_elements( |
| int texel_count, |
| partition_info* pi |
| ) { |
| int partition_tables_zapped = 0; |
| int i, j; |
| uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7]; |
| |
| |
| for (i = 0; i < PARTITION_COUNT; i++) |
| { |
| gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7); |
| } |
| |
| for (i = 0; i < PARTITION_COUNT; i++) |
| { |
| for (j = 0; j < i; j++) |
| { |
| if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j)) |
| { |
| pi[i].partition_count = 0; |
| partition_tables_zapped++; |
| break; |
| } |
| } |
| } |
| delete[]canonicalizeds; |
| } |
| |
| static uint32_t hash52(uint32_t inp) |
| { |
| inp ^= inp >> 15; |
| |
| inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1) |
| inp ^= inp >> 5; |
| inp += inp << 16; |
| inp ^= inp >> 7; |
| inp ^= inp >> 3; |
| inp ^= inp << 6; |
| inp ^= inp >> 17; |
| return inp; |
| } |
| |
| static int select_partition( |
| int seed, |
| int x, |
| int y, |
| int z, |
| int partitioncount, |
| int small_block |
| ) { |
| if (small_block) |
| { |
| x <<= 1; |
| y <<= 1; |
| z <<= 1; |
| } |
| |
| seed += (partitioncount - 1) * 1024; |
| |
| uint32_t rnum = hash52(seed); |
| |
| uint8_t seed1 = rnum & 0xF; |
| uint8_t seed2 = (rnum >> 4) & 0xF; |
| uint8_t seed3 = (rnum >> 8) & 0xF; |
| uint8_t seed4 = (rnum >> 12) & 0xF; |
| uint8_t seed5 = (rnum >> 16) & 0xF; |
| uint8_t seed6 = (rnum >> 20) & 0xF; |
| uint8_t seed7 = (rnum >> 24) & 0xF; |
| uint8_t seed8 = (rnum >> 28) & 0xF; |
| uint8_t seed9 = (rnum >> 18) & 0xF; |
| uint8_t seed10 = (rnum >> 22) & 0xF; |
| uint8_t seed11 = (rnum >> 26) & 0xF; |
| uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; |
| |
| // squaring all the seeds in order to bias their distribution |
| // towards lower values. |
| seed1 *= seed1; |
| seed2 *= seed2; |
| seed3 *= seed3; |
| seed4 *= seed4; |
| seed5 *= seed5; |
| seed6 *= seed6; |
| seed7 *= seed7; |
| seed8 *= seed8; |
| seed9 *= seed9; |
| seed10 *= seed10; |
| seed11 *= seed11; |
| seed12 *= seed12; |
| |
| int sh1, sh2, sh3; |
| if (seed & 1) |
| { |
| sh1 = (seed & 2 ? 4 : 5); |
| sh2 = (partitioncount == 3 ? 6 : 5); |
| } |
| else |
| { |
| sh1 = (partitioncount == 3 ? 6 : 5); |
| sh2 = (seed & 2 ? 4 : 5); |
| } |
| sh3 = (seed & 0x10) ? sh1 : sh2; |
| |
| seed1 >>= sh1; |
| seed2 >>= sh2; |
| seed3 >>= sh1; |
| seed4 >>= sh2; |
| seed5 >>= sh1; |
| seed6 >>= sh2; |
| seed7 >>= sh1; |
| seed8 >>= sh2; |
| |
| seed9 >>= sh3; |
| seed10 >>= sh3; |
| seed11 >>= sh3; |
| seed12 >>= sh3; |
| |
| int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); |
| int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); |
| int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); |
| int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); |
| |
| // apply the saw |
| a &= 0x3F; |
| b &= 0x3F; |
| c &= 0x3F; |
| d &= 0x3F; |
| |
| // remove some of the components if we are to output < 4 partitions. |
| if (partitioncount <= 3) |
| d = 0; |
| if (partitioncount <= 2) |
| c = 0; |
| if (partitioncount <= 1) |
| b = 0; |
| |
| int partition; |
| if (a >= b && a >= c && a >= d) |
| partition = 0; |
| else if (b >= c && b >= d) |
| partition = 1; |
| else if (c >= d) |
| partition = 2; |
| else |
| partition = 3; |
| return partition; |
| } |
| |
| static void generate_one_partition_table( |
| const block_size_descriptor* bsd, |
| int partition_count, |
| int partition_index, |
| partition_info* pt |
| ) { |
| int texels_per_block = bsd->texel_count; |
| int small_block = texels_per_block < 32; |
| |
| uint8_t *partition_of_texel = pt->partition_of_texel; |
| int x, y, z, i; |
| |
| for (z = 0; z < bsd->zdim; z++) |
| for (y = 0; y < bsd->ydim; y++) |
| for (x = 0; x < bsd->xdim; x++) |
| { |
| uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block); |
| *partition_of_texel++ = part; |
| } |
| |
| int counts[4]; |
| for (i = 0; i < 4; i++) |
| counts[i] = 0; |
| |
| for (i = 0; i < texels_per_block; i++) |
| { |
| int partition = pt->partition_of_texel[i]; |
| counts[partition]++; |
| } |
| |
| if (counts[0] == 0) |
| pt->partition_count = 0; |
| else if (counts[1] == 0) |
| pt->partition_count = 1; |
| else if (counts[2] == 0) |
| pt->partition_count = 2; |
| else if (counts[3] == 0) |
| pt->partition_count = 3; |
| else |
| pt->partition_count = 4; |
| } |
| |
| /* Public function, see header file for detailed documentation */ |
| void init_partition_tables( |
| block_size_descriptor* bsd |
| ) { |
| partition_info *par_tab2 = bsd->partitions; |
| partition_info *par_tab3 = par_tab2 + PARTITION_COUNT; |
| partition_info *par_tab4 = par_tab3 + PARTITION_COUNT; |
| partition_info *par_tab1 = par_tab4 + PARTITION_COUNT; |
| |
| generate_one_partition_table(bsd, 1, 0, par_tab1); |
| for (int i = 0; i < 1024; i++) |
| { |
| generate_one_partition_table(bsd, 2, i, par_tab2 + i); |
| generate_one_partition_table(bsd, 3, i, par_tab3 + i); |
| generate_one_partition_table(bsd, 4, i, par_tab4 + i); |
| } |
| |
| partition_table_zap_equal_elements(bsd->texel_count, par_tab2); |
| partition_table_zap_equal_elements(bsd->texel_count, par_tab3); |
| partition_table_zap_equal_elements(bsd->texel_count, par_tab4); |
| } |