blob: 3e4783c56ddac39e1f9477124ad4d7f6374e78a1 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2020 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
/**
* @brief Functions for generating partition tables on demand.
*/
#include "astc_codec_internals.h"
/*
Produce a canonicalized representation of a partition pattern
The largest possible such representation is 432 bits, equal to 7 uint64_t values.
*/
static void gen_canonicalized_partition_table(
int texel_count,
const uint8_t* partition_table,
uint64_t canonicalized[7]
) {
int i;
for (i = 0; i < 7; i++)
canonicalized[i] = 0;
int mapped_index[4];
int map_weight_count = 0;
for (i = 0; i < 4; i++)
mapped_index[i] = -1;
for (i = 0; i < texel_count; i++)
{
int index = partition_table[i];
if (mapped_index[index] == -1)
mapped_index[index] = map_weight_count++;
uint64_t xlat_index = mapped_index[index];
canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F));
}
}
static int compare_canonicalized_partition_tables(
const uint64_t part1[7],
const uint64_t part2[7]
) {
if (part1[0] != part2[0])
return 0;
if (part1[1] != part2[1])
return 0;
if (part1[2] != part2[2])
return 0;
if (part1[3] != part2[3])
return 0;
if (part1[4] != part2[4])
return 0;
if (part1[5] != part2[5])
return 0;
if (part1[6] != part2[6])
return 0;
return 1;
}
/*
For a partition table, detect partitionss that are equivalent, then mark them as invalid. This reduces the number of partitions that the codec has to consider and thus improves encode
performance. */
static void partition_table_zap_equal_elements(
int texel_count,
partition_info* pi
) {
int partition_tables_zapped = 0;
int i, j;
uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7];
for (i = 0; i < PARTITION_COUNT; i++)
{
gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7);
}
for (i = 0; i < PARTITION_COUNT; i++)
{
for (j = 0; j < i; j++)
{
if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j))
{
pi[i].partition_count = 0;
partition_tables_zapped++;
break;
}
}
}
delete[]canonicalizeds;
}
static uint32_t hash52(uint32_t inp)
{
inp ^= inp >> 15;
inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1)
inp ^= inp >> 5;
inp += inp << 16;
inp ^= inp >> 7;
inp ^= inp >> 3;
inp ^= inp << 6;
inp ^= inp >> 17;
return inp;
}
static int select_partition(
int seed,
int x,
int y,
int z,
int partitioncount,
int small_block
) {
if (small_block)
{
x <<= 1;
y <<= 1;
z <<= 1;
}
seed += (partitioncount - 1) * 1024;
uint32_t rnum = hash52(seed);
uint8_t seed1 = rnum & 0xF;
uint8_t seed2 = (rnum >> 4) & 0xF;
uint8_t seed3 = (rnum >> 8) & 0xF;
uint8_t seed4 = (rnum >> 12) & 0xF;
uint8_t seed5 = (rnum >> 16) & 0xF;
uint8_t seed6 = (rnum >> 20) & 0xF;
uint8_t seed7 = (rnum >> 24) & 0xF;
uint8_t seed8 = (rnum >> 28) & 0xF;
uint8_t seed9 = (rnum >> 18) & 0xF;
uint8_t seed10 = (rnum >> 22) & 0xF;
uint8_t seed11 = (rnum >> 26) & 0xF;
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
// squaring all the seeds in order to bias their distribution
// towards lower values.
seed1 *= seed1;
seed2 *= seed2;
seed3 *= seed3;
seed4 *= seed4;
seed5 *= seed5;
seed6 *= seed6;
seed7 *= seed7;
seed8 *= seed8;
seed9 *= seed9;
seed10 *= seed10;
seed11 *= seed11;
seed12 *= seed12;
int sh1, sh2, sh3;
if (seed & 1)
{
sh1 = (seed & 2 ? 4 : 5);
sh2 = (partitioncount == 3 ? 6 : 5);
}
else
{
sh1 = (partitioncount == 3 ? 6 : 5);
sh2 = (seed & 2 ? 4 : 5);
}
sh3 = (seed & 0x10) ? sh1 : sh2;
seed1 >>= sh1;
seed2 >>= sh2;
seed3 >>= sh1;
seed4 >>= sh2;
seed5 >>= sh1;
seed6 >>= sh2;
seed7 >>= sh1;
seed8 >>= sh2;
seed9 >>= sh3;
seed10 >>= sh3;
seed11 >>= sh3;
seed12 >>= sh3;
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
// apply the saw
a &= 0x3F;
b &= 0x3F;
c &= 0x3F;
d &= 0x3F;
// remove some of the components if we are to output < 4 partitions.
if (partitioncount <= 3)
d = 0;
if (partitioncount <= 2)
c = 0;
if (partitioncount <= 1)
b = 0;
int partition;
if (a >= b && a >= c && a >= d)
partition = 0;
else if (b >= c && b >= d)
partition = 1;
else if (c >= d)
partition = 2;
else
partition = 3;
return partition;
}
static void generate_one_partition_table(
const block_size_descriptor* bsd,
int partition_count,
int partition_index,
partition_info* pt
) {
int texels_per_block = bsd->texel_count;
int small_block = texels_per_block < 32;
uint8_t *partition_of_texel = pt->partition_of_texel;
int x, y, z, i;
for (z = 0; z < bsd->zdim; z++)
for (y = 0; y < bsd->ydim; y++)
for (x = 0; x < bsd->xdim; x++)
{
uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
*partition_of_texel++ = part;
}
int counts[4];
for (i = 0; i < 4; i++)
counts[i] = 0;
for (i = 0; i < texels_per_block; i++)
{
int partition = pt->partition_of_texel[i];
counts[partition]++;
}
if (counts[0] == 0)
pt->partition_count = 0;
else if (counts[1] == 0)
pt->partition_count = 1;
else if (counts[2] == 0)
pt->partition_count = 2;
else if (counts[3] == 0)
pt->partition_count = 3;
else
pt->partition_count = 4;
}
/* Public function, see header file for detailed documentation */
void init_partition_tables(
block_size_descriptor* bsd
) {
partition_info *par_tab2 = bsd->partitions;
partition_info *par_tab3 = par_tab2 + PARTITION_COUNT;
partition_info *par_tab4 = par_tab3 + PARTITION_COUNT;
partition_info *par_tab1 = par_tab4 + PARTITION_COUNT;
generate_one_partition_table(bsd, 1, 0, par_tab1);
for (int i = 0; i < 1024; i++)
{
generate_one_partition_table(bsd, 2, i, par_tab2 + i);
generate_one_partition_table(bsd, 3, i, par_tab3 + i);
generate_one_partition_table(bsd, 4, i, par_tab4 + i);
}
partition_table_zap_equal_elements(bsd->texel_count, par_tab2);
partition_table_zap_equal_elements(bsd->texel_count, par_tab3);
partition_table_zap_equal_elements(bsd->texel_count, par_tab4);
}