#include #include #include #include #include #include "test_utils.h" static std::string path_to_test_name(::testing::TestParamInfo> param_info) { std::string name = param_info.param.first; std::replace(name.begin(), name.end(), '/', '_'); std::replace(name.begin(), name.end(), '-', '_'); return name; } static std::string beam_to_test_name(::testing::TestParamInfo param_info) { if (param_info.param == 1) return "GreedySearch"; else return "BeamSearch"; } static void check_weights_dtype(const std::unordered_map& variables, DataType expected_dtype) { for (const auto& variable : variables) { const auto& name = variable.first; const auto& value = variable.second; if (ends_with(name, "weight")) { EXPECT_EQ(value.dtype(), expected_dtype) << "Expected type " << dtype_name(expected_dtype) << " for weight " << name << ", got " << dtype_name(value.dtype()) << " instead"; } } } static DataType dtype_with_fallback(DataType dtype, Device device) { const bool support_int8 = mayiuse_int8(device); const bool support_int16 = mayiuse_int16(device); if (dtype == DataType::INT16 && !support_int16) return support_int8 ? DataType::INT8 : DataType::FLOAT32; if (dtype == DataType::INT8 && !support_int8) return support_int16 ? DataType::INT16 : DataType::FLOAT32; return dtype; } // Test that we can load and translate with different versions of the same model. class ModelVariantTest : public ::testing::TestWithParam> { }; TEST_P(ModelVariantTest, Transliteration) { auto params = GetParam(); const std::string model_path = get_data_dir() + "/models/" + params.first; const DataType model_dtype = params.second; const Device device = Device::CPU; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; std::vector expected = {"a", "t", "z", "m", "o", "n"}; std::vector> type_params; type_params.emplace_back(ComputeType::DEFAULT, dtype_with_fallback(model_dtype, device)); type_params.emplace_back(ComputeType::FLOAT32, DataType::FLOAT32); if (mayiuse_int16(device)) type_params.emplace_back(ComputeType::INT16, DataType::INT16); if (mayiuse_int8(device)) { type_params.emplace_back(ComputeType::INT8, DataType::INT8); type_params.emplace_back(ComputeType::AUTO, DataType::INT8); } else if (mayiuse_int16(device)) { type_params.emplace_back(ComputeType::AUTO, DataType::INT16); } else { type_params.emplace_back(ComputeType::AUTO, DataType::FLOAT32); } for (const auto& types : type_params) { const ComputeType compute_type = types.first; const DataType expected_type = types.second; const auto model = models::Model::load(model_path, device, 0, compute_type); check_weights_dtype(model->get_variables(), expected_type); Translator translator(model); auto result = translator.translate_batch({input})[0]; EXPECT_EQ(result.output(), expected); } } INSTANTIATE_TEST_SUITE_P( TranslatorTest, ModelVariantTest, ::testing::Values( std::make_pair("v1/aren-transliteration", DataType::FLOAT32), std::make_pair("v1/aren-transliteration-i16", DataType::INT16), std::make_pair("v2/aren-transliteration", DataType::FLOAT32), std::make_pair("v2/aren-transliteration-i16", DataType::INT16), std::make_pair("v2/aren-transliteration-i8", DataType::INT8) ), path_to_test_name); class SearchVariantTest : public ::testing::TestWithParam { }; static Translator default_translator(Device device = Device::CPU) { return Translator(default_model_dir(), device); } TEST_P(SearchVariantTest, SetMaxDecodingLength) { Translator translator = default_translator(); TranslationOptions options; options.beam_size = GetParam(); options.max_decoding_length = 3; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; auto result = translator.translate_batch({input}, options)[0]; EXPECT_EQ(result.output().size(), options.max_decoding_length); } TEST_P(SearchVariantTest, SetMinDecodingLength) { Translator translator = default_translator(); TranslationOptions options; options.beam_size = GetParam(); options.min_decoding_length = 8; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; auto result = translator.translate_batch({input}, options)[0]; EXPECT_EQ(result.output().size(), options.min_decoding_length); } TEST_P(SearchVariantTest, SetMaxInputLength) { Translator translator = default_translator(); TranslationOptions options; options.beam_size = GetParam(); options.max_input_length = 3; options.return_attention = true; const std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; auto result = translator.translate_batch({input}, options)[0]; EXPECT_EQ(result.hypotheses[0].size(), options.max_input_length); // Check that attention vectors have the size of the original input. ASSERT_TRUE(result.has_attention()); for (size_t i = 0; i < result.attention[0].size(); ++i) { ASSERT_EQ(result.attention[0][i].size(), input.size()); for (size_t t = options.max_input_length; t < input.size(); ++t) { EXPECT_EQ(result.attention[0][i][t], 0); } } } TEST_P(SearchVariantTest, ReturnAllHypotheses) { auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.num_hypotheses = beam_size; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; auto result = translator.translate_batch({input}, options)[0]; EXPECT_EQ(result.num_hypotheses(), beam_size); } TEST_P(SearchVariantTest, ReturnAttention) { auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.num_hypotheses = beam_size; options.return_attention = true; const std::vector> inputs = { {"آ", "ز", "ا"}, {"آ", "ت", "ز", "م", "و", "ن"} }; const std::vector> expected_shapes = { {4, 3}, {6, 6} }; // (target_length, source_length) const auto results = translator.translate_batch(inputs, options); for (size_t i = 0; i < inputs.size(); ++i) { const TranslationResult& result = results[i]; const auto& expected_shape = expected_shapes[i]; ASSERT_TRUE(result.has_attention()); const auto& attention = result.attention; EXPECT_EQ(attention.size(), beam_size); EXPECT_EQ(attention[0].size(), expected_shape.first); for (const auto& vector : attention[0]) { EXPECT_EQ(vector.size(), expected_shape.second); } } } TEST_P(SearchVariantTest, ReturnAttentionWithPrefix) { const auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.num_hypotheses = beam_size; options.return_attention = true; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; std::vector prefix = {"", "t"}; std::vector expected = {"", "t", "z", "m", "o", "n" }; auto result = translator.translate_batch({input}, {prefix}, options)[0]; EXPECT_EQ(result.output(), expected); EXPECT_TRUE(result.has_attention()); for (const auto& vector : result.attention[0]) { EXPECT_EQ(vector.size(), input.size()); } } TEST_P(SearchVariantTest, ReturnEndToken) { auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.return_end_token = true; const std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; const std::vector expected = {"a", "t", "z", "m", "o", "n", ""}; const auto result = translator.translate_batch({input}, options)[0]; EXPECT_EQ(result.hypotheses[0], expected); } TEST_P(SearchVariantTest, TranslateWithPrefix) { const auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.num_hypotheses = beam_size; options.return_attention = true; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; std::vector prefix = {"a", "t", "s"}; std::vector expected = {"a", "t", "s", "u", "m", "o", "n"}; auto result = translator.translate_batch({input}, {prefix}, options)[0]; EXPECT_EQ(result.num_hypotheses(), beam_size); EXPECT_EQ(result.output(), expected); ASSERT_TRUE(result.has_attention()); const auto& attention = result.attention; EXPECT_EQ(attention.size(), options.beam_size); EXPECT_EQ(attention[0].size(), 7); EXPECT_EQ(attention[0][0].size(), 6); } TEST_P(SearchVariantTest, TranslateBatch) { Translator translator = default_translator(); TranslationOptions options; options.return_scores = true; options.beam_size = GetParam(); std::vector> inputs = { {"آ", "ز", "ا"}, {"آ", "ت", "ز", "م", "و", "ن"}}; std::vector> expected = { {"a", "z", "z", "a"}, {"a", "t", "z", "m", "o", "n"}}; auto result = translator.translate_batch(inputs, options); EXPECT_TRUE(result[0].has_scores()); EXPECT_TRUE(result[1].has_scores()); EXPECT_EQ(result[0].output(), expected[0]); EXPECT_EQ(result[1].output(), expected[1]); } TEST_P(SearchVariantTest, SuppressSequences) { const auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.suppress_sequences = {{"o"}, {"t", "z", "m"}}; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; std::vector expected = {"a", "t", "z", "u", "m", "u", "n"}; auto result = translator.translate_batch({input}, options)[0]; EXPECT_EQ(result.output(), expected); } TEST_P(SearchVariantTest, SuppressSequenceOOV) { const auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.suppress_sequences = {{"o"}, {"t", "oovtoken", "m"}}; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; EXPECT_THROW(translator.translate_batch({input}, options), std::invalid_argument); } TEST_P(SearchVariantTest, EndToken) { const auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.end_token = "m"; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; std::vector expected = {"a", "t", "z"}; auto result = translator.translate_batch({input}, options)[0]; EXPECT_EQ(result.output(), expected); } TEST_P(SearchVariantTest, EndTokenOOV) { const auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.end_token = "oovtoken"; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; EXPECT_THROW(translator.translate_batch({input}, options), std::invalid_argument); } TEST_P(SearchVariantTest, ReplaceUnknowns) { const auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.num_hypotheses = beam_size; options.replace_unknowns = true; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; std::vector prefix = {"", "t"}; std::vector expected = {"ت", "t", "z", "m", "o", "n" }; auto result = translator.translate_batch({input}, {prefix}, options)[0]; EXPECT_EQ(result.output(), expected); } TEST_P(SearchVariantTest, RepetitionPenalty) { const auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.repetition_penalty = 100; // Force the decoding to produce unique symbols. const auto result = translator.translate_batch({{"ن", "ن", "ن", "ن", "ن"}}, options)[0]; const auto& tokens = result.output(); const std::unordered_set unique_tokens(tokens.begin(), tokens.end()); EXPECT_EQ(unique_tokens.size(), tokens.size()); } TEST_P(SearchVariantTest, NoRepeatNgram) { const auto beam_size = GetParam(); Translator translator = default_translator(); TranslationOptions options; options.beam_size = beam_size; options.no_repeat_ngram_size = 3; const std::vector input(50, "ن"); const auto result = translator.translate_batch({input}, options)[0]; const auto output = join_string(result.output()); std::unordered_set ngrams; for (size_t i = 0; i < output.size() - options.no_repeat_ngram_size; ++i) ngrams.emplace(output.substr(i, options.no_repeat_ngram_size)); EXPECT_EQ(ngrams.size(), output.size() - options.no_repeat_ngram_size); } static void check_normalized_score(const std::vector& input, TranslationOptions options, bool output_has_eos = true) { Translator translator = default_translator(); options.return_scores = true; options.length_penalty = 0; const auto score = translator.translate_batch({input}, options)[0].scores[0]; options.length_penalty = 1; const auto normalized_result = translator.translate_batch({input}, options)[0]; const auto normalized_score = normalized_result.scores[0]; auto normalized_length = normalized_result.hypotheses[0].size(); if (output_has_eos) normalized_length += 1; EXPECT_NEAR(normalized_score, score / normalized_length, 1e-6); } TEST_P(SearchVariantTest, NormalizeScores) { TranslationOptions options; options.beam_size = GetParam(); check_normalized_score({"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, options); } TEST_P(SearchVariantTest, NormalizeScoresNoEos) { TranslationOptions options; options.beam_size = GetParam(); options.max_decoding_length = 6; check_normalized_score({"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, options, false); } INSTANTIATE_TEST_SUITE_P( TranslatorTest, SearchVariantTest, ::testing::Values(1, 4), beam_to_test_name); TEST(TranslatorTest, TranslateEmptyBatch) { Translator translator = default_translator(); std::vector> inputs; auto results = translator.translate_batch(inputs); EXPECT_TRUE(results.empty()); } static void check_empty_result(const TranslationResult& result, size_t num_hypotheses = 1, bool with_attention = false, bool with_score = false) { EXPECT_TRUE(result.output().empty()); EXPECT_EQ(result.num_hypotheses(), num_hypotheses); EXPECT_EQ(result.hypotheses.size(), num_hypotheses); EXPECT_EQ(result.has_scores(), with_score); if (with_score) { EXPECT_EQ(result.scores.size(), num_hypotheses); EXPECT_EQ(result.score(), 0); for (const auto score : result.scores) { EXPECT_EQ(score, 0); } } EXPECT_EQ(result.has_attention(), with_attention); if (with_attention) { const auto& attention = result.attention; EXPECT_EQ(attention.size(), num_hypotheses); EXPECT_TRUE(attention[0].empty()); } } TEST(TranslatorTest, TranslateBatchWithEmptySource) { Translator translator = default_translator(); std::vector> inputs = { {}, {"آ", "ز", "ا"}, {}, {"آ", "ت", "ز", "م", "و", "ن"}, {}}; auto results = translator.translate_batch(inputs); EXPECT_EQ(results.size(), 5); check_empty_result(results[0]); EXPECT_EQ(results[1].output(), (std::vector{"a", "z", "z", "a"})); check_empty_result(results[2]); EXPECT_EQ(results[3].output(), (std::vector{"a", "t", "z", "m", "o", "n"})); check_empty_result(results[4]); } TEST(TranslatorTest, TranslateBatchWithOnlyEmptySource) { Translator translator = default_translator(); std::vector> inputs{{}, {}}; auto results = translator.translate_batch(inputs); EXPECT_EQ(results.size(), 2); check_empty_result(results[0]); check_empty_result(results[1]); } TEST(TranslatorTest, TranslateEmptySourceWithoutScore) { Translator translator = default_translator(); TranslationOptions options; options.return_scores = false; EXPECT_FALSE(translator.translate_batch({{}}, options)[0].has_scores()); } TEST(TranslatorTest, TranslateBatchWithHardPrefixAndEmpty) { Translator translator = default_translator(); const TranslationOptions options; const std::vector> input = { {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {}, {"آ" ,"ز" ,"ا"}}; const std::vector> prefix = { {"a", "t", "s"}, {}, {"a", "t", "z", "o"}, {}, {}}; const auto result = translator.translate_batch(input, prefix, options); EXPECT_EQ(result[0].output(), (std::vector{"a", "t", "s", "u", "m", "o", "n"})); EXPECT_EQ(result[1].output(), (std::vector{"a", "t", "z", "m", "o", "n"})); EXPECT_EQ(result[2].output(), (std::vector{"a", "t", "z", "o", "m", "o", "n"})); EXPECT_TRUE(result[3].output().empty()); EXPECT_EQ(result[4].output(), (std::vector{"a", "z", "z", "a"})); } TEST(TranslatorTest, TranslateBatchWithStronglyBiasedPrefix) { // This test should produce the same results as TranslateBatchWithHardPrefixAndEmpty // because prefix_bias_beta is set to 0.99, which is almost equivalent to using a hard prefix. Translator translator = default_translator(); TranslationOptions options; options.prefix_bias_beta = 0.99; options.beam_size = 2; const std::vector> input = { {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ" ,"ز" ,"ا"}}; const std::vector> prefix = { {"a", "t", "s"}, {}, {"a", "t", "z", "o"}, {}}; const auto result = translator.translate_batch(input, prefix, options); EXPECT_EQ(result[0].output(), (std::vector{"a", "t", "s", "u", "m", "o", "n"})); EXPECT_EQ(result[1].output(), (std::vector{"a", "t", "z", "m", "o", "n"})); EXPECT_EQ(result[2].output(), (std::vector{"a", "t", "z", "o", "m", "o", "n"})); EXPECT_EQ(result[3].output(), (std::vector{"a", "z", "z", "a"})); } TEST(TranslatorTest, TranslateBatchWithWeaklyBiasedPrefix) { Translator translator = default_translator(); TranslationOptions options; options.prefix_bias_beta = 0.01; options.beam_size = 2; const std::vector> input = { {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ" ,"ز" ,"ا"} }; const std::vector> prefix = { {"a", "t", "s", "s", "s"}, // Test divergence at divergence first 's' {}, {"a", "t", "z", "o"}, {} }; const auto result = translator.translate_batch(input, prefix, options); EXPECT_EQ(result[0].output(), (std::vector{"a", "t", "z", "m", "o", "n"})); EXPECT_EQ(result[1].output(), (std::vector{"a", "t", "z", "m", "o", "n"})); EXPECT_EQ(result[2].output(), (std::vector{"a", "t", "z", "m", "o", "n"})); EXPECT_EQ(result[3].output(), (std::vector{"a", "z", "z", "a"})); } class BiasedDecodingDeviceFPTest : public ::testing::TestWithParam> { }; TEST_P(BiasedDecodingDeviceFPTest, OneBatchOneBeam) { const Device device = GetParam().first; const DataType dtype = GetParam().second; const dim_t vocab_size = 2; const dim_t batch_size = 1; const dim_t beam_size = 1; const float prefix_bias_beta = 0.35; StorageView logits({batch_size * beam_size, 1, vocab_size}, std::vector{4, 6}); StorageView softmax; ops::SoftMax()(logits, softmax); std::vector expected_log_probs_vec = { std::log((1-prefix_bias_beta) * softmax.at(0) + prefix_bias_beta), std::log((1-prefix_bias_beta) * softmax.at(1)), }; StorageView expected_log_probs(logits.shape(), expected_log_probs_vec, device); StorageView log_probs(device, dtype); const size_t step = 0; const std::vector> beams_diverged_from_prefix = {{false}}; const std::vector batch_offset = {0}; const std::vector> prefix_ids = {{0}}; ctranslate2::BiasedDecoder biased_decoder(prefix_bias_beta, prefix_ids); biased_decoder.decode(batch_size, step, batch_offset, beams_diverged_from_prefix, logits.to(device).to(dtype), log_probs); expect_storage_eq(log_probs.to_float32(), expected_log_probs, 0.01); } TEST_P(BiasedDecodingDeviceFPTest, TwoBatchesTwoBeams) { const Device device = GetParam().first; const DataType dtype = GetParam().second; const dim_t vocab_size = 2; const dim_t batch_size = 2; const dim_t beam_size = 2; const float prefix_bias_beta = 0.35; StorageView logits({batch_size * beam_size, 1, vocab_size}, std::vector{ 4, 6, // batch1 beam1 7, 3, // batch1 beam2 1, 9, // batch2 beam1 8, 2 // batch2 beam2 }); StorageView softmax; ops::SoftMax()(logits, softmax); const std::vector> prefix_ids = { {0}, // bias batch 1 towards token0 {1}}; // bias batch 2 towards token1 std::vector expected_log_probs_vec = { //batch1 beam1 std::log((1-prefix_bias_beta) * softmax.at(0) + prefix_bias_beta), std::log((1-prefix_bias_beta) * softmax.at(1)), //batch1 beam2 std::log((1-prefix_bias_beta) * softmax.at(2) + prefix_bias_beta), std::log((1-prefix_bias_beta) * softmax.at(3)), //batch2 beam1 std::log((1-prefix_bias_beta) * softmax.at(4)), std::log((1-prefix_bias_beta) * softmax.at(5) + prefix_bias_beta), //batch2 beam2 std::log((1-prefix_bias_beta) * softmax.at(6)), std::log((1-prefix_bias_beta) * softmax.at(7) + prefix_bias_beta), }; StorageView expected_log_probs(logits.shape(), expected_log_probs_vec, device); StorageView log_probs(dtype, device); const size_t step = 0; const std::vector> beams_diverged_from_prefix = {{false, false}, {false, false}}; const std::vector batch_offset = {0, 1}; BiasedDecoder biased_decoder(prefix_bias_beta, prefix_ids); biased_decoder.decode(batch_size, step, batch_offset, beams_diverged_from_prefix, logits.to(device).to(dtype), log_probs); expect_storage_eq(log_probs.to_float32(), expected_log_probs, 0.01); } TEST_P(BiasedDecodingDeviceFPTest, BeamDiverged) { const Device device = GetParam().first; const DataType dtype = GetParam().second; const dim_t vocab_size = 2; const dim_t batch_size = 1; const dim_t beam_size = 1; const float prefix_bias_beta = 0.35; StorageView logits({batch_size * beam_size, 1, vocab_size}, std::vector{4, 6}, device); StorageView expected_log_probs(device); ops::LogSoftMax()(logits, expected_log_probs); StorageView log_probs(dtype, device); const size_t step = 0; const std::vector> beams_diverged_from_prefix = {{true}}; const std::vector batch_offset = {0}; const std::vector> prefix_ids = {{0}}; BiasedDecoder biased_decoder(prefix_bias_beta, prefix_ids); biased_decoder.decode(batch_size, step, batch_offset, beams_diverged_from_prefix, logits.to(dtype), log_probs); expect_storage_eq(log_probs.to_float32(), expected_log_probs, 0.01); } TEST_P(BiasedDecodingDeviceFPTest, TimeStepPastPrefix) { const Device device = GetParam().first; const DataType dtype = GetParam().second; const dim_t vocab_size = 2; const dim_t batch_size = 1; const dim_t beam_size = 1; const float prefix_bias_beta = 0.35; StorageView logits({batch_size * beam_size, 1, vocab_size}, std::vector{4, 6}, device); StorageView expected_log_probs(device); ops::LogSoftMax()(logits, expected_log_probs); StorageView log_probs(dtype, device); const size_t step = 1; const std::vector> beams_diverged_from_prefix = {{false}}; const std::vector batch_offset = {0}; const std::vector> prefix_ids = {{0}}; BiasedDecoder biased_decoder(prefix_bias_beta, prefix_ids); biased_decoder.decode(batch_size, step, batch_offset, beams_diverged_from_prefix, logits.to(dtype), log_probs); expect_storage_eq(log_probs.to_float32(), expected_log_probs, 0.01); } TEST_P(BiasedDecodingDeviceFPTest, NonZeroTimestepBias) { const Device device = GetParam().first; const DataType dtype = GetParam().second; const dim_t vocab_size = 2; const dim_t batch_size = 1; const dim_t beam_size = 1; const float prefix_bias_beta = 0.35; StorageView logits({batch_size * beam_size, 1, vocab_size}, std::vector{4, 6}); StorageView softmax; ops::SoftMax()(logits, softmax); std::vector expected_log_probs_vec = { std::log((1-prefix_bias_beta) * softmax.at(0)), std::log((1-prefix_bias_beta) * softmax.at(1) + prefix_bias_beta), }; StorageView expected_log_probs(logits.shape(), expected_log_probs_vec, device); StorageView log_probs(dtype, device); const size_t step = 1; const std::vector> beams_diverged_from_prefix = {{false}}; const std::vector batch_offset = {0}; const std::vector> prefix_ids = {{0, 1, 0}}; BiasedDecoder biased_decoder(prefix_bias_beta, prefix_ids); biased_decoder.decode(batch_size, step, batch_offset, beams_diverged_from_prefix, logits.to(device).to(dtype), log_probs); expect_storage_eq(log_probs.to_float32(), expected_log_probs, 0.01); } TEST_P(BiasedDecodingDeviceFPTest, NonZeroTimestepDiverge) { const Device device = GetParam().first; const DataType dtype = GetParam().second; const dim_t vocab_size = 2; const dim_t batch_size = 1; const dim_t beam_size = 1; const float prefix_bias_beta = 0.35; StorageView logits({batch_size * beam_size, 1, vocab_size}, std::vector{4, 6}, device); StorageView expected_log_probs(device); ops::LogSoftMax()(logits, expected_log_probs); StorageView log_probs(dtype, device); const size_t step = 1; const std::vector> beams_diverged_from_prefix = {{true}}; const std::vector batch_offset = {0}; const std::vector> prefix_ids = {{0, 1, 0}}; BiasedDecoder biased_decoder(prefix_bias_beta, prefix_ids); biased_decoder.decode(batch_size, step, batch_offset, beams_diverged_from_prefix, logits.to(dtype), log_probs); expect_storage_eq(log_probs.to_float32(), expected_log_probs, 0.01); } static std::string fp_test_name(::testing::TestParamInfo> param_info) { return dtype_name(param_info.param.second); } INSTANTIATE_TEST_SUITE_P(CPU, BiasedDecodingDeviceFPTest, ::testing::Values(std::make_pair(Device::CPU, DataType::FLOAT32)), fp_test_name); #ifdef CT2_WITH_CUDA INSTANTIATE_TEST_SUITE_P(CUDA, BiasedDecodingDeviceFPTest, ::testing::Values(std::make_pair(Device::CUDA, DataType::FLOAT32), std::make_pair(Device::CUDA, DataType::FLOAT16)), fp_test_name); #endif TEST(TranslatorTest, TranslatePrefixWithLargeBeam) { // Related to issue https://github.com/OpenNMT/CTranslate2/issues/277 // This is an example where appears in the topk of the first unconstrained decoding // step and produces an incorrect hypothesis that dominates others. Translator translator = default_translator(); TranslationOptions options; options.beam_size = 5; const std::vector input = {"أ" ,"و" ,"ل" ,"ي" ,"س" ,"س"}; const std::vector prefix = {"u", "l", "i", "s", "e"}; const auto result = translator.translate_batch({input}, {prefix}, options)[0]; EXPECT_EQ(result.output(), (std::vector{"u", "l", "i", "s", "e", "s"})); } TEST(TranslatorTest, AlternativesFromPrefix) { Translator translator = default_translator(); TranslationOptions options; options.num_hypotheses = 10; options.return_alternatives = true; options.return_attention = true; const std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; const std::vector prefix = {"a", "t"}; const TranslationResult result = translator.translate_batch({input}, {prefix}, options)[0]; ASSERT_EQ(result.num_hypotheses(), options.num_hypotheses); EXPECT_EQ(result.hypotheses[0], (std::vector{"a", "t", "z", "m", "o", "n"})); EXPECT_EQ(result.hypotheses[1], (std::vector{"a", "t", "s", "u", "m", "o", "n"})); // Tokens at the first unconstrained decoding position should be unique. std::vector tokens_at_position; tokens_at_position.reserve(options.num_hypotheses); for (const std::vector& hypothesis : result.hypotheses) tokens_at_position.emplace_back(hypothesis[prefix.size()]); EXPECT_EQ(std::unique(tokens_at_position.begin(), tokens_at_position.end()), tokens_at_position.end()); EXPECT_TRUE(result.has_attention()); EXPECT_EQ(result.attention[0].size(), 6); } TEST(TranslatorTest, AlternativesFromPrefixMinExpansionProb) { Translator translator = default_translator(); TranslationOptions options; options.num_hypotheses = 10; options.return_scores = true; options.return_attention = true; options.return_alternatives = true; options.min_alternative_expansion_prob = 0.001; const std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; const std::vector prefix = {"a", "t"}; const size_t expected_alternatives = 6; const TranslationResult result = translator.translate_batch({input}, {prefix}, options)[0]; EXPECT_EQ(result.hypotheses.size(), expected_alternatives); EXPECT_EQ(result.scores.size(), expected_alternatives); EXPECT_EQ(result.attention.size(), expected_alternatives); } TEST(TranslatorTest, AlternativesFromPrefixBatch) { Translator translator = default_translator(); TranslationOptions options; options.num_hypotheses = 10; options.return_alternatives = true; const std::vector> input = { {"آ", "ز", "ا"}, {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"} }; const std::vector> prefix = {{"a"}, {"a", "t"}}; const auto results = translator.translate_batch(input, prefix, options); ASSERT_EQ(results.size(), 2); ASSERT_EQ(results[0].num_hypotheses(), options.num_hypotheses); EXPECT_EQ(results[0].hypotheses[0], (std::vector{"a", "z", "z", "a"})); EXPECT_EQ(results[0].hypotheses[1], (std::vector{"a", "s", "z", "a"})); ASSERT_EQ(results[1].num_hypotheses(), options.num_hypotheses); EXPECT_EQ(results[1].hypotheses[0], (std::vector{"a", "t", "z", "m", "o", "n"})); EXPECT_EQ(results[1].hypotheses[1], (std::vector{"a", "t", "s", "u", "m", "o", "n"})); } TEST(TranslatorTest, AlternativesFromScratch) { Translator translator = default_translator(); TranslationOptions options; options.num_hypotheses = 10; options.return_alternatives = true; const std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; const TranslationResult result = translator.translate_batch({input}, options)[0]; ASSERT_EQ(result.num_hypotheses(), options.num_hypotheses); EXPECT_EQ(result.hypotheses[0], (std::vector{"a", "t", "z", "m", "o", "n"})); } TEST(TranslatorTest, AlternativesFromScratchBatch) { Translator translator = default_translator(); TranslationOptions options; options.num_hypotheses = 10; options.return_alternatives = true; const std::vector> inputs = { {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ", "ز", "ا"} }; const std::vector results = translator.translate_batch(inputs, options); ASSERT_EQ(results.size(), inputs.size()); ASSERT_EQ(results[0].num_hypotheses(), options.num_hypotheses); EXPECT_EQ(results[0].hypotheses[0], (std::vector{"a", "t", "z", "m", "o", "n"})); EXPECT_EQ(results[0].hypotheses[1], (std::vector{"e", "t", "z", "m", "o", "n"})); ASSERT_EQ(results[1].num_hypotheses(), options.num_hypotheses); EXPECT_EQ(results[1].hypotheses[0], (std::vector{"a", "z", "z", "a"})); EXPECT_EQ(results[1].hypotheses[1], (std::vector{"e", "z", "z", "a"})); } TEST(TranslatorTest, AlternativesFromFullTarget) { Translator translator = default_translator(); TranslationOptions options; options.num_hypotheses = 4; options.return_alternatives = true; const std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; const std::vector target = {"a", "t", "z", "m", "o", "n"}; const TranslationResult result = translator.translate_batch({input}, {target}, options)[0]; EXPECT_EQ(result.hypotheses[0], (std::vector{"a", "t", "z", "m", "o", "n", "e"})); } TEST(TranslatorTest, AlternativesMaxDecodingLength) { Translator translator = default_translator(); TranslationOptions options; options.num_hypotheses = 4; options.max_decoding_length = 2; options.return_alternatives = true; options.return_scores = true; options.return_attention = true; const std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; const std::vector> target_samples = { {}, {"a"}, {"a", "t"}, {"a", "t", "z"} }; for (const auto& target : target_samples) { const auto result = translator.translate_batch({input}, {target}, options)[0]; for (size_t i = 0; i < result.num_hypotheses(); ++i) { EXPECT_EQ(result.hypotheses[i].size(), options.max_decoding_length); EXPECT_EQ(result.attention[i].size(), options.max_decoding_length); for (size_t t = 0; t < std::min(target.size(), options.max_decoding_length); ++t) { EXPECT_EQ(result.hypotheses[i][t], target[t]); } if (target.size() < options.max_decoding_length) { EXPECT_NE(result.scores[i], 0); } else { EXPECT_EQ(result.scores[i], 0); } } } } TEST(TranslatorTest, InvalidNumHypotheses) { Translator translator = default_translator(); TranslationOptions options; options.num_hypotheses = 0; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; EXPECT_THROW(translator.translate_batch({input}, options), std::invalid_argument); } TEST(TranslatorTest, Patience) { Translator translator = default_translator(); TranslationOptions options; options.beam_size = 4; options.patience = 2; options.num_hypotheses = 8; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; const auto result = translator.translate_batch({input}, options)[0]; EXPECT_EQ(result.num_hypotheses(), options.num_hypotheses); } TEST(TranslatorTest, IgnoreScore) { Translator translator = default_translator(); TranslationOptions options; options.beam_size = 1; options.return_scores = false; const std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; const TranslationResult result = translator.translate_batch({input}, options)[0]; EXPECT_FALSE(result.has_scores()); EXPECT_EQ(result.output(), (std::vector{"a", "t", "z", "m", "o", "n"})); } TEST(TranslatorTest, SameBeamAndGreedyScore) { Translator translator = default_translator(); TranslationOptions options; options.return_scores = true; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; options.beam_size = 1; const auto greedy_score = translator.translate_batch({input}, options)[0].score(); options.beam_size = 2; const auto beam_score = translator.translate_batch({input}, options)[0].score(); EXPECT_NEAR(greedy_score, beam_score, 1e-5); } TEST(TranslatorTest, BeamSizeLargerThanVocabSize) { Translator translator = default_translator(); TranslationOptions options; // 22*2=44 candidates are retrieved from the model output but the vocabulary size is 42. options.beam_size = 22; options.num_hypotheses = options.beam_size; std::vector input = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; auto result = translator.translate_batch({input}, options)[0]; EXPECT_EQ(result.num_hypotheses(), options.num_hypotheses); } TEST(BufferedTranslationWrapperTest, Basic) { BufferedTranslationWrapper wrapper(std::make_shared(default_model_dir()), /*max_batch_size=*/32, /*batch_timeout_in_micros=*/5000); auto future1 = wrapper.translate_async({"آ", "ز", "ا"}); auto future2 = wrapper.translate_async({"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}); EXPECT_EQ(future1.get().hypotheses[0], (std::vector{"a", "z", "z", "a"})); EXPECT_EQ(future2.get().hypotheses[0], (std::vector{"a", "t", "z", "m", "o", "n"})); } TEST(TranslatorTest, Scoring) { const std::vector> source = { {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}, {"آ" ,"ت" ,"ش" ,"ي" ,"س" ,"و" ,"ن"}, {"آ" ,"ر" ,"ب" ,"ا" ,"ك" ,"ه"}, {}, {"آ" ,"ر" ,"ث" ,"ر"}, }; const std::vector> target = { {"a", "t", "z", "m", "o", "n"}, {"a", "c", "h", "i", "s", "o", "n"}, {"a", "r", "b", "a", "k", "e"}, {}, {"a", "r", "t", "h", "e", "r"}, }; const std::vector> expected_scores = { {-0.106023, -0.065410, -0.056002, -0.447953, -0.230714, -0.092184, -0.063463}, {-0.072660, -0.300309, -0.181187, -0.395671, -0.025631, -0.123466, -0.002034, -0.012639}, {-0.103136, -0.089504, -0.063889, -0.007327, -0.452072, -0.060154, -0.016636}, {0}, {-0.076704, -0.036037, -0.029253, -0.030273, -0.149276, -0.002440, -0.003742}, }; constexpr float abs_diff = 1e-5; Translator translator = default_translator(); const auto scores = translator.score_batch(source, target); ASSERT_EQ(scores.size(), expected_scores.size()); for (size_t i = 0; i < scores.size(); ++i) expect_vector_eq(scores[i].tokens_score, expected_scores[i], abs_diff); } TEST(TranslatorTest, ScoringMaxInputLength) { const std::vector source = {"آ" ,"ت" ,"ز" ,"م" ,"و" ,"ن"}; const std::vector target = {"a", "t", "z", "m", "o", "n"}; ScoringOptions options; options.max_input_length = 4; Translator translator = default_translator(); const auto result = translator.score_batch({source}, {target}, options)[0]; EXPECT_EQ(result.tokens, (std::vector{"a", "t", "z", ""})); EXPECT_EQ(result.tokens_score.size(), options.max_input_length); }