diff --git a/common/custom_data/TFrag3Data.cpp b/common/custom_data/TFrag3Data.cpp index 12cb53a3dc1..25fc742ba98 100644 --- a/common/custom_data/TFrag3Data.cpp +++ b/common/custom_data/TFrag3Data.cpp @@ -587,6 +587,42 @@ void MercModelGroup::serialize(Serializer& ser) { ser.from_pod_vector(&vertices); } +void ShadowModelFragment::serialize(Serializer& ser) { + ser.from_ptr(&first_vertex); + ser.from_ptr(&num_one_bone_vertices); + ser.from_ptr(&num_two_bone_vertices); + ser.from_pod_vector(&single_tris); + ser.from_pod_vector(&double_tris); + ser.from_pod_vector(&single_edges); + ser.from_pod_vector(&double_edges); +} + +void ShadowModel::serialize(Serializer& ser) { + ser.from_str(&name); + ser.from_ptr(&max_bones); + + if (ser.is_saving()) { + ser.save(fragments.size()); + } else { + fragments.resize(ser.load()); + } + for (auto& frag : fragments) { + frag.serialize(ser); + } +} + +void ShadowModelGroup::serialize(Serializer& ser) { + ser.from_pod_vector(&vertices); + if (ser.is_saving()) { + ser.save(models.size()); + } else { + models.resize(ser.load()); + } + for (auto& model : models) { + model.serialize(ser); + } +} + void Level::serialize(Serializer& ser) { ser.from_ptr(&version); if (ser.is_loading() && version != TFRAG3_VERSION) { @@ -647,9 +683,9 @@ void Level::serialize(Serializer& ser) { } hfrag.serialize(ser); - collision.serialize(ser); merc_data.serialize(ser); + shadow_data.serialize(ser); ser.from_ptr(&version2); if (ser.is_loading() && version2 != TFRAG3_VERSION) { @@ -770,6 +806,10 @@ void Hfragment::memory_usage(tfrag3::MemoryUsageTracker* tracker) const { tracker->add(MemoryUsageCategory::HFRAG_CORNERS, corners.size() * sizeof(HfragmentCorner)); } +void ShadowModelGroup::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(SHADOW_VERTS, vertices.size() * sizeof(ShadowVertex)); +} + void Level::memory_usage(MemoryUsageTracker* tracker) const { for (const auto& texture : textures) { texture.memory_usage(tracker); @@ -793,6 +833,7 @@ void Level::memory_usage(MemoryUsageTracker* tracker) const { hfrag.memory_usage(tracker); collision.memory_usage(tracker); merc_data.memory_usage(tracker); + shadow_data.memory_usage(tracker); } void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) { @@ -837,8 +878,9 @@ void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) { {"hfrag-verts", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_VERTS]}, {"hfrag-index", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_INDEX]}, {"hfrag-time-of-day", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_TIME_OF_DAY]}, - {"hfrag-corners", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_CORNERS]} - + {"hfrag-corners", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_CORNERS]}, + {"shadow-vert", mem_use.data[SHADOW_VERTS]}, + {"shadow-ind", mem_use.data[SHADOW_INDEX]}, }; for (auto& known : known_categories) { total_accounted += known.second; diff --git a/common/custom_data/Tfrag3Data.h b/common/custom_data/Tfrag3Data.h index 2dac7cf479b..1a03b60a6d6 100644 --- a/common/custom_data/Tfrag3Data.h +++ b/common/custom_data/Tfrag3Data.h @@ -18,7 +18,7 @@ namespace tfrag3 { // - if changing any large things (vertices, vis, bvh, colors, textures) update get_memory_usage // - if adding a new category to the memory usage, update extract_level to print it. -constexpr int TFRAG3_VERSION = 43; +constexpr int TFRAG3_VERSION = 44; enum MemoryUsageCategory { TEXTURE, @@ -66,6 +66,9 @@ enum MemoryUsageCategory { HFRAG_TIME_OF_DAY, HFRAG_CORNERS, + SHADOW_VERTS, + SHADOW_INDEX, + COLLISION, NUM_CATEGORIES @@ -614,7 +617,48 @@ struct MercModelGroup { void memory_usage(MemoryUsageTracker* tracker) const; }; -// +struct ShadowVertex { + float pos[3]; + float weight; + u8 mats[2]; + u8 flags; +}; +static_assert(sizeof(ShadowVertex) == 20); + +struct ShadowTri { + u8 verts[3]; +}; + +struct ShadowEdge { + u8 ind[2]; + u8 tri[2]; +}; + +struct ShadowModelFragment { + std::vector single_tris, double_tris; + std::vector single_edges, double_edges; + u32 first_vertex; + u32 num_one_bone_vertices; + u32 num_two_bone_vertices; + void serialize(Serializer& ser); +}; + +struct ShadowModel { + static constexpr int kMaxVertices = 254; + static constexpr int kMaxTris = 254; + std::string name; + u32 max_bones; + std::vector fragments; + + void serialize(Serializer& ser); +}; + +struct ShadowModelGroup { + std::vector vertices; + std::vector models; + void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; +}; constexpr int TFRAG_GEOS = 3; constexpr int TIE_GEOS = 4; @@ -630,6 +674,7 @@ struct Level { Hfragment hfrag; CollisionMesh collision; MercModelGroup merc_data; + ShadowModelGroup shadow_data; u16 version2 = TFRAG3_VERSION; void serialize(Serializer& ser); void memory_usage(MemoryUsageTracker* tracker) const; diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index 7e6f1403d60..a31ea2c3b17 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -62,6 +62,7 @@ add_library( level_extractor/extract_joint_group.cpp level_extractor/extract_level.cpp level_extractor/extract_merc.cpp + level_extractor/extract_shadow.cpp level_extractor/extract_tfrag.cpp level_extractor/extract_tie.cpp level_extractor/extract_shrub.cpp diff --git a/decompiler/level_extractor/extract_level.cpp b/decompiler/level_extractor/extract_level.cpp index 7471920c88c..453002a61cb 100644 --- a/decompiler/level_extractor/extract_level.cpp +++ b/decompiler/level_extractor/extract_level.cpp @@ -3,6 +3,8 @@ #include #include +#include "extract_shadow.h" + #include "common/log/log.h" #include "common/util/FileUtil.h" #include "common/util/SimpleThreadGroup.h" @@ -129,6 +131,7 @@ void extract_art_groups_from_level(const ObjectFileDB& db, extract_merc(ag_file, tex_db, db.dts, tex_remap, level_data, false, db.version(), swapped_info); extract_joint_group(ag_file, db.dts, db.version(), art_group_data); + extract_shadow(ag_file, db.dts, level_data, false, db.version()); } } } diff --git a/decompiler/level_extractor/extract_shadow.cpp b/decompiler/level_extractor/extract_shadow.cpp new file mode 100644 index 00000000000..9460c599c98 --- /dev/null +++ b/decompiler/level_extractor/extract_shadow.cpp @@ -0,0 +1,417 @@ +#include "extract_shadow.h" + +#include "common/log/log.h" +#include "common/util/BitUtils.h" + +#include "decompiler/util/goal_data_reader.h" + +namespace decompiler { + +struct ShadowVertex { + math::Vector3f pos; + float weight; +}; + +struct ShadowRef { + uint8_t joint_0 = 0; + uint8_t joint_1 = 0; +}; + +struct ShadowTri { + uint8_t verts[3]; + uint8_t faces; +}; + +struct ShadowEdge { + uint8_t ind[2]; + uint8_t tri[2]; +}; + +struct ShadowData { + std::string name; + uint32_t num_joints = 0; + std::vector one_bone_vertices; + std::vector two_bone_vertices; + std::vector refs; + std::vector single_tris, double_tris; + std::vector single_edges, double_edges; +}; + +std::string debug_dump_to_ply(const ShadowData& data) { + int num_verts = data.one_bone_vertices.size() + data.two_bone_vertices.size(); + std::string result = fmt::format( + "ply\nformat ascii 1.0\nelement vertex {}\nproperty float x\nproperty float y\nproperty " + "float z\nproperty uchar red\nproperty uchar green\nproperty uchar blue\nelement face " + "{}\nproperty list uchar int vertex_index\nend_header\n", + 2 * num_verts, data.single_tris.size() + data.double_tris.size()); + + for (auto& vtx : data.one_bone_vertices) { + result += fmt::format("{} {} {} {} {} {}\n", vtx.pos.x() / 1024.f, vtx.pos.y() / 1024.f, + vtx.pos.z() / 1024.f, 128, 128, 128); + } + for (auto& vtx : data.two_bone_vertices) { + result += fmt::format("{} {} {} {} {} {}\n", vtx.pos.x() / 1024.f, vtx.pos.y() / 1024.f, + vtx.pos.z() / 1024.f, 128, 128, 128); + } + for (auto& vtx : data.one_bone_vertices) { + result += fmt::format("{} {} {} {} {} {}\n", vtx.pos.x() / 1024.f, vtx.pos.y() / 1024.f, + vtx.pos.z() / 1024.f, 128, 256, 128); + } + for (auto& vtx : data.two_bone_vertices) { + result += fmt::format("{} {} {} {} {} {}\n", vtx.pos.x() / 1024.f, vtx.pos.y() / 1024.f, + vtx.pos.z() / 1024.f, 128, 256, 128); + } + + for (auto& face : data.single_tris) { + result += fmt::format("3 {} {} {}\n", face.verts[0], face.verts[1], face.verts[2]); + } + + for (auto& face : data.double_tris) { + result += fmt::format("3 {} {} {}\n", face.verts[0] + num_verts, face.verts[1] + num_verts, + face.verts[2] + num_verts); + } + + return result; +} + +constexpr int kHeaderSize = 48; + +ShadowData extract_shadow_data(const LinkedObjectFile& file, + const DecompilerTypeSystem& dts, + TypedRef header_ref, + const std::string& name, + int size_qwc, + int num_joints) { + ShadowData shadow_data; + + ASSERT(size_qwc < 1024 * 1024); // something reasonable + std::vector data(size_qwc * 16); + Ref shadow_ref = header_ref.ref; + shadow_ref.byte_offset += kHeaderSize; + memcpy_from_plain_data(data.data(), shadow_ref, size_qwc * 16 - kHeaderSize); + + // lg::info("name is {}, has {} joints, size {} bytes", name, + // read_plain_data_field(header_ref, "num-joints", dts), data.size()); + + shadow_data.name = name; + shadow_data.num_joints = num_joints; + + const u32 num_verts = read_plain_data_field(header_ref, "num-verts", dts); + const u32 num_twos = read_plain_data_field(header_ref, "num-twos", dts); + ASSERT(num_verts >= num_twos); + const u32 num_ones = num_verts - num_twos; + // lg::info(" vert counts {} {}", num_ones, num_twos); + + const u32 ofs_verts = read_plain_data_field(header_ref, "ofs-verts", dts); + const u32 ofs_refs = read_plain_data_field(header_ref, "ofs-refs", dts); + const u32 ofs_single_tris = read_plain_data_field(header_ref, "ofs-single-tris", dts); + const u32 ofs_single_edges = read_plain_data_field(header_ref, "ofs-single-edges", dts); + const u32 ofs_double_tris = read_plain_data_field(header_ref, "ofs-double-tris", dts); + const u32 ofs_double_edges = read_plain_data_field(header_ref, "ofs-double-edges", dts); + + const u32 num_single_tris = read_plain_data_field(header_ref, "num-single-tris", dts); + const u32 num_single_edges = read_plain_data_field(header_ref, "num-single-edges", dts); + const u32 num_double_tris = read_plain_data_field(header_ref, "num-double-tris", dts); + const u32 num_double_edges = read_plain_data_field(header_ref, "num-double-edges", dts); + + ASSERT(ofs_verts == kHeaderSize); // verts always right after the header + + // lg::info(" offsets {} {} {} {} {} {}", ofs_verts, ofs_refs, ofs_single_tris, ofs_single_edges, + // ofs_double_tris, ofs_double_edges); + + // vertices + ASSERT(ofs_refs - ofs_verts == 16 * num_verts); + shadow_data.one_bone_vertices.resize(num_ones); + memcpy_from_plain_data(shadow_data.one_bone_vertices.data(), shadow_ref, num_ones * 16); + shadow_ref.byte_offset += num_ones * 16; + for (const auto& x : shadow_data.one_bone_vertices) { + ASSERT(x.weight == 1); + } + + shadow_data.two_bone_vertices.resize(num_twos); + memcpy_from_plain_data(shadow_data.two_bone_vertices.data(), shadow_ref, num_twos * 16); + shadow_ref.byte_offset += num_twos * 16; + for (auto x : shadow_data.two_bone_vertices) { + ASSERT(x.weight > 0 && x.weight < 1); + } + + // refs + ASSERT(ofs_single_tris - ofs_refs == align16(num_verts * 2)); + shadow_data.refs.resize(num_verts); + memcpy_from_plain_data(shadow_data.refs.data(), shadow_ref, num_verts * 2); + shadow_ref.byte_offset += ofs_single_tris - ofs_refs; + for (size_t i = 0; i < num_verts; i++) { + ASSERT(shadow_data.refs[i].joint_0 < shadow_data.num_joints); + if (i < num_ones) { + ASSERT(shadow_data.refs[i].joint_1 == 255); + } else { + ASSERT(shadow_data.refs[i].joint_1 < shadow_data.num_joints); + ASSERT(shadow_data.refs[i].joint_1 != shadow_data.refs[i].joint_0); + } + } + + // single tris + ASSERT(ofs_single_edges - ofs_single_tris == align16(num_single_tris * 4)); + shadow_data.single_tris.resize(num_single_tris); + memcpy_from_plain_data(shadow_data.single_tris.data(), shadow_ref, num_single_tris * 4); + shadow_ref.byte_offset += ofs_single_edges - ofs_single_tris; + for (auto& tri : shadow_data.single_tris) { + for (auto v : tri.verts) { + ASSERT(v < num_verts); + } + ASSERT(tri.faces == 0); + } + + // single edges + ASSERT(ofs_double_tris - ofs_single_edges == align16(num_single_edges * 4)); + shadow_data.single_edges.resize(num_single_edges); + memcpy_from_plain_data(shadow_data.single_edges.data(), shadow_ref, num_single_edges * 4); + shadow_ref.byte_offset += ofs_double_tris - ofs_single_edges; + for (auto& edge : shadow_data.single_edges) { + for (auto x : edge.ind) { + ASSERT(x < num_verts); + } + ASSERT(edge.tri[0] != 255); + for (auto x : edge.tri) { + ASSERT(x == 255 || x < shadow_data.single_tris.size()); + } + } + + // double tris + ASSERT(ofs_double_edges - ofs_double_tris == align16(num_double_tris * 4)); + shadow_data.double_tris.resize(num_double_tris); + memcpy_from_plain_data(shadow_data.double_tris.data(), shadow_ref, num_double_tris * 4); + shadow_ref.byte_offset += ofs_double_edges - ofs_double_tris; + for (auto& tri : shadow_data.double_tris) { + for (auto v : tri.verts) { + ASSERT(v < num_verts); + } + ASSERT(tri.faces == 0); + } + + // double edges + ASSERT(size_qwc * 16 - ofs_double_edges == align16(num_double_edges * 4)); + shadow_data.double_edges.resize(num_double_edges); + memcpy_from_plain_data(shadow_data.double_edges.data(), shadow_ref, num_double_edges * 4); + for (auto& edge : shadow_data.double_edges) { + for (auto x : edge.ind) { + ASSERT(x < num_verts); + } + ASSERT(edge.tri[0] != 255); + for (auto x : edge.tri) { + ASSERT(x == 255 || x < shadow_data.double_tris.size()); + } + } + return shadow_data; +} + +ShadowData extract_jak1_shadow_data(const LinkedObjectFile& file, + const DecompilerTypeSystem& dts, + int geo_word_idx) { + Ref ref; + ref.data = &file; + ref.seg = 0; + ref.byte_offset = geo_word_idx * 4; + auto tr = typed_ref_from_basic(ref, dts); + auto header_ref = TypedRef(get_field_ref(tr, "header", dts), dts.ts.lookup_type("shadow-header")); + u32 size_qwc = read_plain_data_field(header_ref, "qwc-data", dts); + const std::string name = read_string_field(tr, "name", dts, false); + int num_joints = read_plain_data_field(header_ref, "num-joints", dts); + return extract_shadow_data(file, dts, header_ref, name, size_qwc, num_joints); +} + +std::vector extract_jak2_shadow_data(const LinkedObjectFile& file, + const DecompilerTypeSystem& dts, + int geo_word_idx) { + std::vector shadow_datas; + Ref ref; + ref.data = &file; + ref.seg = 0; + ref.byte_offset = geo_word_idx * 4; + auto tr = typed_ref_from_basic(ref, dts); + uint32_t version = read_plain_data_field(tr, "version", dts); + std::string name = read_string_field(tr, "name", dts, false); + + if (version == 0) { + tr.type = dts.ts.lookup_type("shadow-geo-old"); + auto header_ref = + TypedRef(get_field_ref(tr, "header", dts), dts.ts.lookup_type("shadow-frag-header")); + u32 size_qwc = read_plain_data_field(header_ref, "qwc-data", dts); + int num_joints = read_plain_data_field(header_ref, "num-joints", dts); + shadow_datas.push_back(extract_shadow_data(file, dts, header_ref, name, size_qwc, num_joints)); + } else if (version == 1) { + u32 num_joints = read_plain_data_field(tr, "num-joints", dts); + uint32_t num_fragments = read_plain_data_field(tr, "num-fragments", dts); + if (num_fragments == 0) { + lg::error("Shadow geo {} with no fragments! Skipping\n", name); + return {}; + } + // lg::info("{} {} fragments", name, num_fragments); + auto frags_ref = + TypedRef(get_field_ref(tr, "frags", dts), dts.ts.lookup_type("shadow-frag-ref")); + for (u32 i = 0; i < num_fragments; i++) { + auto header_ref = TypedRef(deref_label(get_field_ref(frags_ref, "header", dts)), + dts.ts.lookup_type("shadow-frag-header")); + u32 size_qwc = read_plain_data_field(frags_ref, "qwc", dts); + shadow_datas.push_back( + extract_shadow_data(file, dts, header_ref, name, size_qwc, num_joints)); + frags_ref.ref.byte_offset += 8; + } + } else { + lg::die("unknown version {}\n", version); + } + return shadow_datas; +} + +std::vector convert_vertices(const ShadowData& data) { + std::vector result; + + for (size_t i = 0; i < data.one_bone_vertices.size(); i++) { + const auto& in = data.one_bone_vertices[i]; + auto& out = result.emplace_back(); + out.pos[0] = in.pos.x(); + out.pos[1] = in.pos.y(); + out.pos[2] = in.pos.z(); + out.weight = 1.f; + out.mats[0] = data.refs.at(i).joint_0; + out.mats[1] = data.refs.at(i).joint_1; + ASSERT(out.mats[1] == 255); + ASSERT(in.weight == 1.f); + out.flags = 0; + } + + for (size_t i = 0; i < data.two_bone_vertices.size(); i++) { + const auto& in = data.two_bone_vertices[i]; + auto& out = result.emplace_back(); + out.pos[0] = in.pos.x(); + out.pos[1] = in.pos.y(); + out.pos[2] = in.pos.z(); + out.weight = in.weight; + ASSERT(out.weight != 1.f && out.weight != 0.f); + out.mats[0] = data.refs.at(data.one_bone_vertices.size() + i).joint_0; + out.mats[1] = data.refs.at(data.one_bone_vertices.size() + i).joint_1; + ASSERT(out.mats[0] != 255); + ASSERT(out.mats[1] != 255); + out.flags = 0; + } + + return result; +} + +tfrag3::ShadowTri convert_tri(const ShadowTri& tri) { + tfrag3::ShadowTri result; + for (int i = 0; i < 3; i++) { + result.verts[i] = tri.verts[i]; + } + return result; +} + +tfrag3::ShadowEdge convert_edge(const ShadowEdge& edge) { + tfrag3::ShadowEdge result; + for (int i = 0; i < 2; i++) { + result.ind[i] = edge.ind[i]; + result.tri[i] = edge.tri[i]; + } + return result; +} + +std::vector convert_tris(const std::vector& tris) { + std::vector result; + result.reserve(tris.size()); + for (auto& tri : tris) { + result.push_back(convert_tri(tri)); + } + return result; +} + +std::vector convert_edges(const std::vector& edges) { + std::vector result; + result.reserve(edges.size()); + for (auto& edge : edges) { + result.push_back(convert_edge(edge)); + } + return result; +} + +void add_data_to_level(tfrag3::ShadowModelGroup& sd, const std::vector& fragments) { + if (fragments.empty()) { + return; + } + auto& model = sd.models.emplace_back(); + model.name = fragments.front().name; + model.max_bones = fragments.front().num_joints; + + for (auto& in_frag : fragments) { + auto& out_frag = model.fragments.emplace_back(); + + out_frag.single_tris = convert_tris(in_frag.single_tris); + out_frag.double_tris = convert_tris(in_frag.double_tris); + out_frag.single_edges = convert_edges(in_frag.single_edges); + out_frag.double_edges = convert_edges(in_frag.double_edges); + + const u32 vertex_offset = sd.vertices.size(); + + out_frag.first_vertex = vertex_offset; + out_frag.num_one_bone_vertices = in_frag.one_bone_vertices.size(); + out_frag.num_two_bone_vertices = in_frag.two_bone_vertices.size(); + ASSERT(out_frag.num_one_bone_vertices + out_frag.num_two_bone_vertices <= + tfrag3::ShadowModel::kMaxVertices); + ASSERT(out_frag.single_tris.size() <= tfrag3::ShadowModel::kMaxTris); + ASSERT(out_frag.double_tris.size() <= tfrag3::ShadowModel::kMaxTris); + + // insert top vertices + auto vertices = convert_vertices(in_frag); + sd.vertices.insert(sd.vertices.end(), vertices.begin(), vertices.end()); + + // bottom vertices + for (auto& v : vertices) { + v.flags = 1; + } + sd.vertices.insert(sd.vertices.end(), vertices.begin(), vertices.end()); + } + // if (dump_level) { + // auto file_path = file_util::get_file_path( + // {"debug_out/shadow", fmt::format("{}_{}.ply", ag_data.name_in_dgo, i)}); + // file_util::create_dir_if_needed_for_file(file_path); + // file_util::write_text_file(file_path, debug_dump_to_ply(data)); + // } + // i++; +} + +void extract_shadow(const ObjectFileData& ag_data, + const DecompilerTypeSystem& dts, + tfrag3::Level& out, + bool dump_level, + GameVersion version) { + // hack + // dump_level = true; + + if (dump_level) { + file_util::create_dir_if_needed(file_util::get_file_path({"debug_out/shadow"})); + } + auto& sd = out.shadow_data; + + if (version == GameVersion::Jak1) { + auto geo_locations = find_objects_with_type(ag_data.linked_data, "shadow-geo"); + // if (!geo_locations.empty()) { + // lg::error("{} has {} shadows", ag_data.name_in_dgo, geo_locations.size()); + // } + + for (auto loc : geo_locations) { + const ShadowData data = extract_jak1_shadow_data(ag_data.linked_data, dts, loc); + add_data_to_level(sd, {data}); + } + } else { + // Jak2 has two versions of shadow. The "new" version has multiple fragments. + // Although there is a shadow-geo-old type in GOAL code, it's not actually used in the game + // data: both new and old types simply have shadow-geo type tags. + ASSERT(find_objects_with_type(ag_data.linked_data, "shadow-geo-old").empty()); + + auto geo_locations = find_objects_with_type(ag_data.linked_data, "shadow-geo"); + for (auto loc : geo_locations) { + auto shadow_datas = extract_jak2_shadow_data(ag_data.linked_data, dts, loc); + add_data_to_level(sd, shadow_datas); + } + } +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/level_extractor/extract_shadow.h b/decompiler/level_extractor/extract_shadow.h new file mode 100644 index 00000000000..88f15b4de43 --- /dev/null +++ b/decompiler/level_extractor/extract_shadow.h @@ -0,0 +1,14 @@ +#pragma once + +#include "common/custom_data/Tfrag3Data.h" + +#include "decompiler/ObjectFile/ObjectFileDB.h" + +namespace decompiler { + +void extract_shadow(const ObjectFileData& ag_data, + const DecompilerTypeSystem& dts, + tfrag3::Level& out, + bool dump_level, + GameVersion version); +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/util/goal_data_reader.cpp b/decompiler/util/goal_data_reader.cpp index 12777b84954..102b8988c69 100644 --- a/decompiler/util/goal_data_reader.cpp +++ b/decompiler/util/goal_data_reader.cpp @@ -51,7 +51,8 @@ void read_plain_data_field(const TypedRef& object, } } -void memcpy_from_plain_data(u8* dest, const Ref& source, int size_bytes) { +void memcpy_from_plain_data(void* _dest, const Ref& source, int size_bytes) { + u8* dest = (u8*)_dest; const auto& words = source.data->words_by_seg.at(source.seg); for (int byte = 0; byte < size_bytes; byte++) { int byte_in_words = byte + source.byte_offset; diff --git a/decompiler/util/goal_data_reader.h b/decompiler/util/goal_data_reader.h index 8a3ad5339e1..e3aa1a2b269 100644 --- a/decompiler/util/goal_data_reader.h +++ b/decompiler/util/goal_data_reader.h @@ -45,7 +45,7 @@ T read_plain_data_field(const TypedRef& object, return result; } -void memcpy_from_plain_data(u8* dest, const Ref& source, int size_bytes); +void memcpy_from_plain_data(void* dest, const Ref& source, int size_bytes); std::vector bytes_from_plain_data(const Ref& source, int size_bytes); decompiler::LinkedWord::Kind get_word_kind_for_field(const TypedRef& object, diff --git a/docs/progress-notes/shadow.md b/docs/progress-notes/shadow.md new file mode 100644 index 00000000000..55e56db7955 --- /dev/null +++ b/docs/progress-notes/shadow.md @@ -0,0 +1,1088 @@ +# Shadow Renderer + +The shadow renderer works by darkening the intersection between the "shadow volume" and the world. There's a clever trick sometimes called "Carmack's Reverse" to accomplish this, but it requires drawing the "shadow volume". + +The game builds the shadow volume mesh in `shadow-cpu.gc`, then submits it to a VU1 renderer. This part is a MIPS2C mess and we want to redo it in C++. + +## Drawing Procedure Jak 1 + +## Setup + +The shadow is not drawn is `disable-draw` flag is set. + +The `center` stored in `shadow-settings` and `shadow-dcache` have different meanings. + +The `center` in `shadow-settings` is set from `draw-bones-shadow`, which is a joint point. The `center` in `shadow-dcache` is + +``` +dcache.center = settings.center + settings.dir * settings.dist-to-locus +``` + +There are both top and bottom clipping planes. If the `shdf02` flag is set, the planes in `settings` are treated as global. Otherwise, they are treated as "relative". However, computing the final plane assumes the planes have a y normal: + +``` +(set! (-> dcache plane w) (- (-> settings bot-plane w) (-> settings center y))) +``` + +If `shdf00` is set, the shadow is discarded if the camera is below the plane: +``` + (let ((v1-16 (camera-pos))) + (if (< (+ (* (-> v1-16 x) (-> dcache plane x)) + (* (-> v1-16 y) (-> dcache plane y)) + (* (-> v1-16 z) (-> dcache plane z)) + (-> dcache plane w) + ) + 0.0 + ) + (set! s1-0 #t) + ) + ) +``` + +The shadow plane is adjusted (again assuming it's +y normal) to make sure the shadow center is inside the volume: +``` + (let ((f0-25 (+ (* (-> dcache center x) (-> dcache plane x)) + (* (-> dcache center y) (-> dcache plane y)) + (* (-> dcache center z) (-> dcache plane z)) + ) + ) + ) + (if (< 0.0 (+ f0-25 (-> dcache plane w))) + (set! (-> dcache plane w) (- f0-25)) + ) + ) +``` + +Final setup of dcache: +``` + (set! (-> dcache light-dir quad) (-> settings shadow-dir quad)) + (set! (-> dcache near-plane x) 0.0) + (set! (-> dcache near-plane y) 0.0) + (set! (-> dcache near-plane z) 1.0) + (set! (-> dcache near-plane w) (* -2.0 (-> *math-camera* d))) + (set! (-> dcache dcache-top) (the-as uint (-> dcache data))) +``` + +## Stages + +The stages are: + +- `xform-verts` transform mesh vertices into camera space (no perspective) +- `init-vars` transform settings to camera space +- `calc-dual-verts` project vertices to plane +- `scissor-top` (only executed if shdf03 is set), clip vertices to top plane, if above +- `scissor-edges`, clip vertices to near plane +- `find-facing-single-tris`, set face bit to indicate orientation, cull backward ones +- `find-single-edges`, find edges that, when extruded, should be drawn +- `find-facing-double-tris`, set face bit indicate orientation. double sided tris, so no culling +- `find-double-edges`, find edges to extrude from the double-sided tris +- `add-verts` +- `add-facing-single-tris` +- `add-single-edges` +- `add-double-tris` +- `add-double-edges` + + +## Transform Verts + +this needs access to only the `num-joints` in the header bone matrices. + +```asm +L98: + lw v1, 0(a0) ;; v1 = qwc-data + lw a2, 20(a0) ;; a2 = ofs-verts + dsll v1, v1, 4 ;; v1 = 16 * qwc-data + lw t0, 24(a0) ;; t0 = ofs-refs + daddu a2, a2, a0 ;; a2 = verts-in-ptr + lh a3, 8(a0) ;; a3 = num-verts + daddu t0, t0, a0 ;; t0 = refs-ptr + lw t1, 4(a0) ;; t1 = num-joints + daddu v1, a0, v1 ;; v1 = dest-start?? + sw a2, 0(a1) ;; store vtx-table in shadow-dcache + daddiu v1, v1, 144 ;; v1 = dest-start + 144... + or a1, t0, r0 ;; a1 = refs-ptr + lh t0, 10(a0) ;; t0 = num-twos + or a2, a2, r0 ;; no effect + dsubu a3, a3, t0 ;; a3 = num-verts - num-twos + lui t0, 28672 + ori t0, t0, 2608 ;; 0xa30 offset in spad + beq a3, r0, L100 + +;; transform ones +B1: +L99: + daddiu a3, a3, -1 ;; decrement num-ones counter + lbu t0, 0(a1) ;; t0 = ref[0] + lbu t1, 1(a1) ;; t1 = ref[1] + daddiu a1, a1, 2 ;; increment ref + dsll t0, t0, 7 ;; t0 = mat0-idx * 128 + daddu t0, t0, v1 ;; t0 = matrix pointer + lqc2 vf1, 0(t0) ;; load transformation matrix! + lqc2 vf2, 16(t0) + lqc2 vf3, 32(t0) + lqc2 vf4, 48(t0) + lqc2 vf9, 0(a2) ;; load vertex + vmulaw.xyzw acc, vf4, vf0 ;; transform!! + vmaddax.xyzw acc, vf1, vf9 + vmadday.xyzw acc, vf2, vf9 + vmaddz.xyz vf9, vf3, vf9 + sqc2 vf9, 0(a2) ;; store! + daddiu a2, a2, 16 + bne a3, r0, L99 + +B2: +L100: + lh a0, 10(a0) ;; num-twos + beq a0, r0, L102 + sll r0, r0, 0 + +B3: +L101: + daddiu a0, a0, -1 ;; decrement remaining count + lbu t0, 0(a1) ;; load mat0 + lbu a3, 1(a1) ;; load mat1 + dsll t0, t0, 7 ;; mat0_idx * 128 + daddiu a1, a1, 2 ;; increment refs ptr + dsll a3, a3, 7 ;; mat0_idx * 128 + daddu t0, t0, v1 ;; t0 = mat0_ptr + daddu a3, a3, v1 ;; a3 = mat1_ptr + lqc2 vf1, 0(t0) ;; load mat0 + lqc2 vf2, 16(t0) + lqc2 vf3, 32(t0) + lqc2 vf4, 48(t0) + lqc2 vf9, 0(a2) ;; load vertex + lqc2 vf5, 0(a3) ;; load mat1 + lqc2 vf6, 16(a3) + lqc2 vf7, 32(a3) + lqc2 vf8, 48(a3) + vsubw.w vf10, vf0, vf9 ;; vf10.w = 1 - vertex.w + vmulaw.xyzw acc, vf4, vf0 ;; xform 0 to vf10.xyz + vmaddax.xyzw acc, vf1, vf9 + vmadday.xyzw acc, vf2, vf9 + vmaddz.xyz vf10, vf3, vf9 + + vmulaw.xyzw acc, vf8, vf0 ;; xform 1 to vf9.xyz + vmaddax.xyzw acc, vf5, vf9 + vmadday.xyzw acc, vf6, vf9 + vmaddz.xyz vf9, vf7, vf9 + + vmulaw.xyz acc, vf10, vf9 ;; combine + vmaddw.xyz vf9, vf9, vf10 + vaddx.w vf9, vf0, vf0 ;; make sure w = 1. + + sqc2 vf9, 0(a2) + daddiu a2, a2, 16 + bne a0, r0, L101 + sll r0, r0, 0 + +B4: + sll r0, r0, 0 + sll r0, r0, 0 +B5: +L102: + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## Init Vars + +This function just transforms light-dir, plane, top-plane, and center into the camera frame. +See details of transformation below. + +- `vf7 = cam_rot[0]` +- `vf8 = cam_rot[1]` +- `vf9 = cam_rot[2]` +- `vf10 = cam_rot[3]` +- `vf1 = light-dir` +- `vf11 = plane` +- `vf12 = top-plane` +- `vf2 = center` + +`vf1`, `vf11`, `vf12` (light-dir, both planes) are rotated by `cam-rot` +`vf2`:`center` is transformed by `cam-rat` + +``` + lw v1, *math-camera*(s7) + or v1, v1, r0 + lqc2 vf7, 364(v1) + lqc2 vf8, 380(v1) + lqc2 vf9, 396(v1) + lqc2 vf10, 412(v1) + lqc2 vf1, 128(a1) + lqc2 vf11, 80(a1) + lqc2 vf12, 96(a1) + lqc2 vf2, 64(a1) + + vmulax.xyzw acc, vf7, vf1 ;; rotate light-dir + vmadday.xyzw acc, vf8, vf1 + vmaddz.xyzw vf1, vf9, vf1 + + vmulax.xyzw acc, vf7, vf11 ;; rotate plane + vmadday.xyzw acc, vf8, vf11 + vmaddz.xyz vf11, vf9, vf11 + + vmulax.xyzw acc, vf7, vf12 ;; rotate top-plane + vmadday.xyzw acc, vf8, vf12 + vmaddz.xyz vf12, vf9, vf12 + + vmul.xyzw vf13, vf10, vf11 ;; vf13 = dot(cam_pos, plane) + + vmulaw.xyzw acc, vf10, vf0 ;; acc = cam_pos + vmaddax.xyzw acc, vf7, vf2 ;; acc = cam_pos + cam_rot_x*center + + vmul.xyzw vf14, vf10, vf12 ;; vf14 = dot(cam_pos, top-plane) + + vsubx.w vf13, vf13, vf13 ;; vf13 = dot(cam_pos, plane) - [0, 0, 0, cam.x*plane.x] + vsubx.w vf14, vf14, vf14 ;; vf14 = dot(cam_pos, top-plane) - [0, 0, 0, cam.x*plane.x] + + vmadday.xyzw acc, vf8, vf2 ;; acc = cam_pos + cam_rot_x*center + cam_rot_y*center + vmaddz.xyzw vf2, vf9, vf2 + + vsuby.w vf13, vf13, vf13 + vsuby.w vf14, vf14, vf14 + vsubz.w vf11, vf13, vf13 + vsubz.w vf12, vf14, vf14 + sqc2 vf2, 64(a1) + sqc2 vf1, 128(a1) + sqc2 vf11, 80(a1) + sqc2 vf12, 96(a1) + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## Calc Dual Verts +This runs each vertex on program 28. It takes two cycles through the program!! +``` + nop | mul.xyzw vf27, vf20, Q N | V1-10 + div Q, vf13.x, vf17.x | sub.xyzw vf19, vf01, vf03 V2-9 | V0-0 + move.xyzw vf23, vf07 | sub.xyzw vf20, vf01, vf04 ?? | V1-0 + nop | sub.xyzw vf21, vf01, vf05 N | V2-0 + move.xyzw vf25, vf09 | sub.xyzw vf22, vf01, vf06 ?? | V3-0 + move.xyzw vf26, vf10 | sub.xyzw vf24, vf08, vf27 ?? | V1-11 + nop | mul.xyzw vf11, vf03, vf02 N | V0-1 + nop | mul.xyz vf15, vf19, vf02 N | V0-2 + div Q, vf14.x, vf18.x | mul.xyzw vf12, vf04, vf02 V3-9 | V1-1 + move.xyzw vf07, vf03 | mul.xyzw vf28, vf28, Q V0-3 | V2-10 + move.xyzw vf08, vf04 | mul.xyz vf16, vf20, vf02 V1-3 | V1-2 + move.xyzw vf09, vf05 | addy.x vf11, vf11, vf11 V2-3 | V0-4 + move.xyzw vf10, vf06 | addy.x vf15, vf15, vf15 V3-3 | V0-5 + nop | sub.xyzw vf25, vf25, vf28 N | V2-11 + nop | addy.x vf12, vf12, vf12 N | V1-4 + nop | mul.xyzw vf29, vf29, Q N | V3-10 + nop | addy.x vf16, vf16, vf16 N | V1-5 + nop | addz.x vf11, vf11, vf11 N | V0-6 + nop | addz.x vf15, vf15, vf15 N | V0-7 + nop | sub.xyzw vf26, vf26, vf29 N | V3-11 + nop | addz.x vf12, vf12, vf12 N | V1-6 + nop | addz.x vf16, vf16, vf16 N | V1-7 + nop | addw.x vf11, vf11, vf11 N | V9-8 + nop | mul.xyzw vf13, vf09, vf02 N | V2-1 + nop | addw.x vf12, vf12, vf12 N | V1-8 + nop | mul.xyz vf17, vf21, vf02 N | V2-2 + nop | mul.xyzw vf14, vf10, vf02 N | V3-1 + div Q, vf11.x, vf15.x | mul.xyz vf18, vf22, vf02 V0-9 | V3-2 + nop | addy.x vf13, vf13, vf13 N | V2-4 + nop | addy.x vf17, vf17, vf17 N | V2-5 + nop | addy.x vf14, vf14, vf14 N | V3-4 + nop | addy.x vf18, vf18, vf18 N | V3-5 + nop | addz.x vf13, vf13, vf13 N | V2-6 + nop | addz.x vf17, vf17, vf17 N | V2-7 + div Q, vf12.x, vf16.x | addz.x vf14, vf14, vf14 V1-9 | V3-6 + nop | mul.xyzw vf19, vf19, Q N | V0-10 + move.xyzw vf28, vf21 | addz.x vf18, vf18, vf18 ~ | V3-7 + move.xyzw vf29, vf22 | addw.x vf13, vf13, vf13 ~ | V2-8 + nop | addw.x vf14, vf14, vf14 :e N | V3-8 + nop | sub.xyzw vf07, vf07, vf19 N | V0-11 +``` + +`vf03`'s path: +- 0 `sub.xyzw vf19, vf01, vf03` : `vf19 = center - vert` +- 1 `mul.xyzw vf11, vf03, vf02` : `vf11 = dot(vert, plane)` +- 2 `mul.xyz vf15, vf19, vf02` : `vf15 = dot3(center - vert, plane)` +- 3 `move.xyzw vf07, vf03` : `vf07 = vert` +- 4 `addy.x vf11, vf11, vf11` : `vf11.x += vf11.y` +- 5 `addy.x vf15, vf15, vf15` : `vf15.x += vf15.y` +- 6 `addz.x vf11, vf11, vf11` : `vf11.x += vf11.z` +- 7 `addz.x vf15, vf15, vf15` : `vf15.x += vf15.z` +- 8 `addw.x vf11, vf11, vf11` : `vf11.x += vf11.w` +- 9 `div Q, vf11.x, vf15.x` : `Q = dot(vert, plane) / dot3(center - vert, plane)` +- 10 `mul.xyzw vf19, vf19, Q` : +- 11 `sub.xyzw vf07, vf07, vf19`: + +This is projecting the vertex onto the plane! + +``` +L93: + lw v1, 16(a1) ;; v1 = dcache-top + lw a2, 0(a1) ;; a2 = vtx-table + daddiu v1, v1, 15 ;; v1 = dcache-top + 15 + lqc2 vf1, 64(a1) ;; vf1 = center + dsra v1, v1, 4 ;; aligning dcache ptr + lqc2 vf2, 80(a1) ;; vf2 = plane + dsll a3, v1, 4 ;; aligning dcache ptr + lh a0, 8(a0) ;; a0 = num-verts + or v1, a3, r0 ;; v1 = dest-ptr + sw a3, 44(a1) ;; storing ptr-dual-verts + or a2, a2, r0 ;; no effect + beq a0, r0, L97 + sll r0, r0, 0 + +B1: + lq a3, 0(a2) ;; a3 = vtx0 + lq t0, 16(a2) ;; t0 = vtx1 + lq t1, 32(a2) ;; t1 = vtx2 + lq t2, 48(a2) ;; t2 = vtx3 + daddiu a2, a2, 64 ;; inc vtx ptr + qmtc2.i vf3, a3 ;; set vertex to vf3, vf4, vf5, vf6 + qmtc2.ni vf4, t0 + qmtc2.ni vf5, t1 + qmtc2.ni vf6, t2 + vcallms 28 ;; run program 28 + sll r0, r0, 0 + daddiu a0, a0, -4 ;; decrement vertex by 4. + lq a3, 0(a2) ;; start loading next + blez a0, L95 ;; leftovers loop + lq t0, 16(a2) + +B2: + lq t1, 32(a2) + lq t2, 48(a2) + daddiu a2, a2, 64 + qmtc2.i vf3, a3 + qmtc2.ni vf4, t0 + qmtc2.ni vf5, t1 + qmtc2.ni vf6, t2 +B3: +L94: + vcallms 28 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.i a3, vf23 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sq a3, 0(v1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.ni a3, vf24 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sq a3, 16(v1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.ni a3, vf25 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sq a3, 32(v1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.ni a3, vf26 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sq a3, 48(v1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + lq a3, 0(a2) + sll r0, r0, 0 + lq t0, 16(a2) + sll r0, r0, 0 + lq t1, 32(a2) + daddiu a0, a0, -4 + lq t2, 48(a2) + daddiu a2, a2, 64 + daddiu v1, v1, 64 + sll r0, r0, 0 + qmtc2.ni vf3, a3 + sll r0, r0, 0 + qmtc2.ni vf4, t0 + sll r0, r0, 0 + qmtc2.ni vf5, t1 + bgtz a0, L94 + qmtc2.ni vf6, t2 + +B4: +L95: + vcallms 68 + sll r0, r0, 0 + vnop + sll r0, r0, 0 + daddiu a2, a0, 3 + qmfc2.i a3, vf23 + daddiu t0, a0, 2 + qmfc2.i t1, vf24 + daddiu t2, a0, 1 + qmfc2.i t3, vf25 + daddiu a0, a0, 4 + qmfc2.i t4, vf26 + beq a2, r0, L96 + sq a3, 0(v1) + +B5: + beq t0, r0, L96 + sq t1, 16(v1) + +B6: + beq t2, r0, L96 + sq t3, 32(v1) + +B7: + sll r0, r0, 0 + sq t4, 48(v1) +B8: +L96: + dsll a0, a0, 4 + sll r0, r0, 0 + daddu v1, v1, a0 + sll r0, r0, 0 +B9: +L97: + sw v1, 16(a1) ;; dcache top store + or v0, r0, r0 + jr ra + daddu sp, sp, r0 + + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 +``` + +## Scissor Top +If a vertex is past the top, it's projected to the top plane. It does so by moving along the direction of the bottom plane projection. + +``` +B0: +L83: + lw a2, 44(a1) ;; a2 = dual verts + lw v1, 0(a1) ;; a1 = normal verts + lqc2 vf3, 96(a1) ;; vf3 = top plane + lh a0, 8(a0) ;; a0 = num-verts + or a1, a2, r0 ;; a1 = dual-verts + or v1, v1, r0 + beq a0, r0, L86 + sll r0, r0, 0 + +B1: +L84: + lqc2 vf1, 0(v1) ;; vf1 = vert + lqc2 vf2, 0(a1) ;; vf2 = dual vert + vsub.xyzw vf4, vf2, vf1 ;; vf4 = dual - orig + vmul.xyzw vf5, vf1, vf3 ;; dot4(vert, top_plane) + vmul.xyz vf6, vf4, vf3 ;; dot3(dual-orig, top_plane) + vaddx.y vf5, vf5, vf5 ;; adds for dots + vaddy.x vf6, vf6, vf6 + vaddz.y vf5, vf5, vf5 + vaddz.x vf6, vf6, vf6 + vaddw.y vf5, vf5, vf5 + qmfc2.i a2, vf5 ;; checking the dot4 to see which side of top plane we're on + bltz a2, L85 + sll r0, r0, 0 + +B2: + vdiv Q, vf5.y, vf6.x ;; we're past the top plane, need to project original vertex. + vwaitq + vmulq.xyzw vf4, vf4, Q + vsub.xyzw vf1, vf1, vf4 + sqc2 vf1, 0(v1) +B3: +L85: + daddiu v1, v1, 16 + daddiu a1, a1, 16 + daddiu a0, a0, -1 + bne a0, r0, L84 + sll r0, r0, 0 + +B4: +L86: + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## Scissor Edges + +This function is to prevent the shadow edges from going through the camera near plane. +``` +L87: + lw a3, 44(a1) ;; a3 = dual-verts + lw a2, 0(a1) ;; a2 = verts + lqc2 vf3, 112(a1) ;; vf3 = near plane + lh v1, 8(a0) ;; v1 = num-verts + or a0, a3, r0 ;; a0 = duals + or a1, a2, r0 ;; a1 = verts + beq v1, r0, L92 + sll r0, r0, 0 + +B1: +L88: + lqc2 vf1, 0(a1) ;; vf1 = vert + lqc2 vf2, 0(a0) ;; vf2 = dual vert + vaddw.z vf7, vf1, vf3 ;; vf7.z = vert.z + near_plane.w + vaddw.z vf8, vf2, vf3 ;; vf8.z = dual.z + near_plane.w + vsubz.z vf6, vf1, vf2 ;; vf6.z = vert.z - dual.z + vaddw.z vf5, vf1, vf3 ;; vf5.z = vert.z + near_plane.w (?? again) + + vaddz.y vf7, vf0, vf7 ;; vf7.y = vert.z + near_plane.w + vaddz.y vf8, vf0, vf8 ;; vf8.y = dual.z + near_plane.w + vsub.xyz vf4, vf2, vf1 ;; vf4 = dual - vert + qmfc2.i a2, vf7 ;; a2 = compare of vert + qmfc2.i a3, vf8 ;; a3 = compare of dual + bltz a2, L89 + sll r0, r0, 0 + +B2: + bgtz a3, L91 + sll r0, r0, 0 + +B3: + beq r0, r0, L90 + sll r0, r0, 0 + +B4: +L89: + bltz a3, L91 + sll r0, r0, 0 + +B5: + vdiv Q, vf5.z, vf6.z + vwaitq + vmulq.xyzw vf4, vf4, Q + vnop + vnop + vnop + vadd.xyzw vf1, vf1, vf4 + beq r0, r0, L91 + sqc2 vf1, 0(a1) + +B6: +L90: + vdiv Q, vf5.z, vf6.z + vwaitq + vmulq.xyzw vf4, vf4, Q + vnop + vnop + vnop + vadd.xyzw vf1, vf1, vf4 + beq r0, r0, L91 + sqc2 vf1, 0(a0) + +B7: +L91: + daddiu a1, a1, 16 + daddiu a0, a0, 16 + daddiu v1, v1, -1 + bne v1, r0, L88 + sll r0, r0, 0 + +B8: +L92: + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## Find Facing Single Tris + +``` + daddiu sp, sp, -64 + sd ra, 0(sp) + sq s4, 16(sp) + sq s5, 32(sp) + sq gp, 48(sp) + + lw v1, 16(a1) ;; dcache top (so we're writing something out!) + lh t0, 12(a0) ;; t0 = num-single-tris + or a2, v1, r0 + lw a3, 28(a0) + daddu a0, a3, a0 + or a3, a0, r0 ;; a3 = single tris + lqc2 vf2, 64(a1) ;; vf2 = center + lqc2 vf1, 128(a1) ;; vf1 = light-dir + lqc2 vf11, 80(a1) ;; vf11 = plane + lw a0, 0(a1) ;; a0 = vtx-ptr + pextlw a0, a0, a0 ;; a0 = [vtx-ptr, vtx-ptr, vtx-ptr, vtx-ptr] + pextlw a0, a0, a0 + daddiu t0, t0, -4 ;; 4 tris at a time I guess. + addiu t1, r0, 1 ;; t1 = 1 + bltz t0, L78 + daddiu t0, t0, 4 + +B1: + lq t3, 0(a3) + pextub t2, r0, t3 + mfc1 r0, f31 + pextlb t3, r0, t3 + mfc1 r0, f31 + psllh t2, t2, 4 + mfc1 r0, f31 + psllh t4, t3, 4 + mfc1 r0, f31 + pextuh t3, r0, t4 + mfc1 r0, f31 + pextlh t4, r0, t4 + mfc1 r0, f31 + pextuh t7, r0, t2 + mfc1 r0, f31 + pextlh t5, r0, t2 + mfc1 r0, f31 + paddw t6, t4, a0 + mfc1 r0, f31 + pcpyud t4, t6, r0 + lq t2, 0(t6) + paddw t8, t3, a0 + lq t3, 0(t4) + pcpyud t9, t8, r0 + lq t4, 0(t8) + dsra32 t6, t6, 0 + dsra32 t8, t8, 0 + paddw s5, t5, a0 + lq t5, 0(t9) + pcpyud t9, s5, r0 + lq t6, 0(t6) + paddw gp, t7, a0 + lq t7, 0(t8) + pcpyud ra, gp, r0 + lq t8, 0(s5) + dsra32 s5, s5, 0 + dsra32 s4, gp, 0 + lq s5, 0(s5) + lq t9, 0(t9) + lq gp, 0(gp) + lq s4, 0(s4) + lq ra, 0(ra) + qmtc2.ni vf2, t2 + qmtc2.ni vf3, t6 + qmtc2.ni vf4, t3 + qmtc2.ni vf7, t4 + qmtc2.ni vf8, t7 + qmtc2.ni vf9, t5 + qmtc2.ni vf12, t8 + qmtc2.ni vf13, s5 + qmtc2.ni vf14, t9 + qmtc2.ni vf17, gp + qmtc2.ni vf18, s4 + qmtc2.ni vf19, ra +B2: +L73: + lq t3, 16(a3) + daddiu t0, t0, -4 + vcallms 0 + pextub t2, r0, t3 + mfc1 r0, f31 + pextlb t3, r0, t3 + mfc1 r0, f31 + psllh t2, t2, 4 + mfc1 r0, f31 + psllh t4, t3, 4 + mfc1 r0, f31 + pextuh t3, r0, t4 + mfc1 r0, f31 + pextlh t4, r0, t4 + mfc1 r0, f31 + pextuh t7, r0, t2 + mfc1 r0, f31 + pextlh t5, r0, t2 + mfc1 r0, f31 + paddw t6, t4, a0 + mfc1 r0, f31 + pcpyud t4, t6, r0 + lq t2, 0(t6) + paddw t8, t3, a0 + lq t3, 0(t4) + pcpyud t9, t8, r0 + lq t4, 0(t8) + dsra32 t6, t6, 0 + dsra32 t8, t8, 0 + paddw s5, t5, a0 + lq t5, 0(t9) + pcpyud t9, s5, r0 + lq t6, 0(t6) + paddw gp, t7, a0 + lq t7, 0(t8) + pcpyud ra, gp, r0 + lq t8, 0(s5) + dsra32 s5, s5, 0 + dsra32 s4, gp, 0 + lq s5, 0(s5) + lq t9, 0(t9) + lq gp, 0(gp) + lq s4, 0(s4) + lq ra, 0(ra) + qmtc2.ni vf2, t2 + qmtc2.ni vf3, t6 + qmtc2.ni vf4, t3 + qmtc2.ni vf7, t4 + qmtc2.ni vf8, t7 + qmtc2.ni vf9, t5 + qmtc2.ni vf12, t8 + qmtc2.ni vf13, s5 + qmtc2.ni vf14, t9 + qmtc2.ni vf17, gp + qmtc2.ni vf18, s4 + qmtc2.ni vf19, ra + qmfc2.ni t3, vf22 + qmfc2.ni t4, vf23 + qmfc2.ni t2, vf24 + bgez t3, L74 + qmfc2.ni t3, vf25 + +B3: + sb t1, 3(a3) + sw a3, 0(a2) + daddiu a2, a2, 4 +B4: +L74: + bgez t4, L75 + daddiu a3, a3, 4 + +B5: + sb t1, 3(a3) + sw a3, 0(a2) + daddiu a2, a2, 4 +B6: +L75: + bgez t2, L76 + daddiu a3, a3, 4 + +B7: + sb t1, 3(a3) + sw a3, 0(a2) + daddiu a2, a2, 4 +B8: +L76: + bgez t3, L77 + daddiu a3, a3, 4 + +B9: + sb t1, 3(a3) + sw a3, 0(a2) + daddiu a2, a2, 4 +B10: +L77: + bgtz t0, L73 + daddiu a3, a3, 4 + +B11: +L78: + blez t0, L81 + sll r0, r0, 0 + +B12: +L79: + lbu t2, 0(a3) ;; t2 = ind-0 + lbu t3, 1(a3) ;; t3 = ind-1 + lbu t1, 2(a3) ;; t1 = ind-2 + dsll t2, t2, 4 ;; multiply by 16 + dsll t3, t3, 4 + dsll t1, t1, 4 + daddu t2, t2, a0 ;; offset, get original vertex + daddu t3, t3, a0 + daddu t1, t1, a0 + lqc2 vf2, 0(t2) + lqc2 vf3, 0(t3) + lqc2 vf4, 0(t1) + vsub.xyzw vf5, vf3, vf2 + vsub.xyzw vf6, vf4, vf2 + vopmula.xyz acc, vf5, vf6 + vopmsub.xyz vf5, vf6, vf5 ;; vf5 is the normal + vmul.xyz vf5, vf5, vf1 ;; dot with the light-dir + vaddx.y vf5, vf5, vf5 + vaddz.y vf5, vf5, vf5 + qmfc2.i t1, vf5 + sll r0, r0, 0 + bgez t1, L80 + addiu t1, r0, 1 + +B13: + sw a3, 0(a2) ;; output this triangle (as a pointer to the shadow-tri) + daddiu a2, a2, 4 + sb t1, 3(a3) ;; store a faces = 1 in the tri itself. +B14: +L80: + daddiu t0, t0, -1 + bne t0, r0, L79 + daddiu a3, a3, 4 + +B15: +L81: + dsubu a0, a2, v1 + dsra a0, a0, 2 + sw a0, 20(a1) ;; num facing-single-tris + sw v1, 32(a1) ;; single tri list + sw a2, 16(a1) ;; dcache top + or v0, r0, r0 + ld ra, 0(sp) + lq gp, 48(sp) + lq s5, 32(sp) + lq s4, 16(sp) + jr ra + daddiu sp, sp, 64 +``` + +## Find Single Edges + +``` +L66: + lw a2, 16(a1) ;; top + lh a3, 14(a0) ;; a3 = num-single-edges + or v1, a2, r0 ;; v1 = dcache top + lw t0, 32(a0) ;; t0 = ofs-single-edges + beq a3, r0, L71 ;; exit if none + lw t1, 28(a0) ;; t1 = ofs-single-tris + +B1: + daddu t0, t0, a0 ;; t0 = single edge table + sw a2, 36(a1) ;; set single-edge-list + daddu a0, t1, a0 ;; a0 = orig vertices + sw t0, 4(a1) ;; set single-edge-table + or t1, t0, r0 ;; t1 = single edges + addiu t2, r0, 255 ;; t2 = 255 + sll r0, r0, 0 +B2: +L67: + daddiu a3, a3, -1 ;; dec counter + lbu t4, 3(t1) ;; t4 = edge.tri-1 + sll r0, r0, 0 + lbu t5, 2(t1) ;; t5 = edge.tri-0 + beq t4, t2, L68 ;; goto L68 if tri-1 is 255. + or t3, r0, r0 ;; t3 = 0 + +B3: ;; case where both tris are set. + dsll t3, t5, 2 ;; t3 = tri-0 + dsll t4, t4, 2 + daddu t3, t3, a0 + daddu t5, t4, a0 ;; t5 = tri-1 + sll r0, r0, 0 + lbu t4, 3(t3) ;; t4 = tri-0.faces + sll r0, r0, 0 + lbu t5, 3(t5) ;; t5 = tri-1.faces + sltiu t3, t4, 1 ;; t3 = tri-0.faces < 1 = (tri0.faces == 0) == + sll r0, r0, 0 + beq t4, t5, L70 ;; if facing is equal skip this. + sll r0, r0, 0 + +B4: + beq r0, r0, L69 + sll r0, r0, 0 + +B5: +L68: ;; case where tri 1 is 255 + dsll t4, t5, 2 ;; t4 = tri-0 + sll r0, r0, 0 + daddu t4, t4, a0 ;; t4 = tri0 + sll r0, r0, 0 + sll r0, r0, 0 + lbu t4, 3(t4) ;; t4 = tri-0.faces: + beq t4, r0, L70 ;; if facing isn't set, skip this. + sll r0, r0, 0 + +B6: +L69: + dsubu t4, t1, t0 ;; t4 = edge idx + sh t3, 2(v1) ;; store (0, or, tri0.faces == 0) + sh t4, 0(v1) ;; store the edge idx. + daddiu v1, v1, 4 +B7: +L70: + bne a3, r0, L67 + daddiu t1, t1, 4 + +B8: +L71: + dsubu a0, v1, a2 + dsra a0, a0, 2 + sw a0, 24(a1) + sw v1, 16(a1) + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## OOO add-facing-single-tris + +``` +L41: + lw v1, *shadow-data*(s7) + or a3, v1, r0 ;; a3 = shadow-data + lw v1, 20(a1) ;; v1 = num-facing + lw a0, 32(a1) ;; a0 = single-tri-list + beq v1, r0, L43 + daddiu a1, v1, 1 ;; a1 = num-facing + 1 + +B1: + daddiu a1, a1, 3 ;; a1 += 3 + dsra t0, a1, 2 ;; shift right/left to align + dsll a1, t0, 2 + daddiu t0, t0, 1 ;; add one + ld t1, 80(a3) ;; t1 = dma-cnt + daddu t0, t1, t0 ;; setup dma stuff + lw a3, 92(a3) ;; unpack + sd t0, 0(a2) ;; + addiu t0, r0, 16728 + sw r0, 8(a2) + sw a3, 12(a2) + sb a1, 14(a2) + dsll a1, a1, 2 + sh t0, 12(a2) + daddiu a2, a2, 16 + daddu a1, a2, a1 + sq r0, -16(a1) + sw v1, 0(a2) + daddiu a2, a2, 4 +B2: +L42: + lw a3, 0(a0) + daddiu a0, a0, 4 + lw a3, 0(a3) + daddiu v1, v1, -1 + sw a3, 0(a2) + daddiu a2, a2, 4 + bgtz v1, L42 + sll r0, r0, 0 + +B3: + or v1, a1, r0 + lui a0, 5376 + ori a0, a0, 2 + sq r0, 0(v1) + sw a0, 12(v1) + daddiu a2, v1, 16 +B4: +L43: + or v0, a2, r0 + jr ra + daddu sp, sp, r0 +``` + +## Find Facing Double Tris + +Same as single, but we don't build a list. + +## Find Double Edges + +``` + lw a2, 16(a1) + lh a3, 18(a0) ;; num-double-edges + or v1, a2, r0 + lw t1, 40(a0) + beq a3, r0, L55 + lw t0, 12(a1) + +B1: + daddu a0, t1, a0 + sw a2, 40(a1) + sw a0, 8(a1) + or t1, a0, r0 + addiu t2, r0, 255 +B2: +L52: + daddiu a3, a3, -1 + lbu t3, 3(t1) ;; t3 = tri1 + sll r0, r0, 0 + lbu t4, 2(t1) ;; t4 = tri0 + beq t3, t2, L53 + or t5, r0, r0 + +B3: ;; if tri1 != 255 + dsll t4, t4, 2 + dsll t3, t3, 2 + daddu t4, t4, t0 + daddu t3, t3, t0 + sll r0, r0, 0 + lbu t4, 3(t4) ;; t4 = tri0->face + sll r0, r0, 0 + lbu t3, 3(t3) ;; t3 = tri1->face + beq t4, t3, L54 ;; skip if facing equal + sll r0, r0, 0 + +B4: + sltiu t4, t4, 1 ;; t4 = (tri0->face == 0) + sll r0, r0, 0 + sltu t3, r0, t3 ;; t3 = (tri1->face != 0) + sll r0, r0, 0 + sll r0, r0, 0 + sh t4, 2(v1) ;; flip0 = (tri0->face == 0) + dsubu t4, t1, a0 + sh t3, 6(v1) ;; flip1 = (tri1->face != 0) + sll r0, r0, 0 + sh t4, 0(v1) + sll r0, r0, 0 + sh t4, 4(v1) + beq r0, r0, L54 + daddiu v1, v1, 8 + +B5: +L53: ;; if tri1 == 255 + dsll t3, t4, 2 + sll r0, r0, 0 + daddu t3, t3, t0 + sll r0, r0, 0 + sll r0, r0, 0 + lbu t3, 3(t3) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sltiu t3, t3, 1 + dsubu t4, t1, a0 + sh t3, 2(v1) + sh t4, 0(v1) + daddiu v1, v1, 4 +B6: +L54: + bne a3, r0, L52 + daddiu t1, t1, 4 + +B7: +L55: + dsubu a0, v1, a2 + dsra a0, a0, 2 + sw a0, 28(a1) + sw v1, 16(a1) + or v0, r0, r0 + jr ra + daddu sp, sp, r0 + + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 +``` \ No newline at end of file diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index ba6da7cce1c..a6ffd03b132 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -55,6 +55,8 @@ set(RUNTIME_SOURCE graphics/opengl_renderer/foreground/Merc2.cpp graphics/opengl_renderer/foreground/Merc2BucketRenderer.cpp graphics/opengl_renderer/foreground/Shadow2.cpp + graphics/opengl_renderer/foreground/Shadow3.cpp + graphics/opengl_renderer/foreground/Shadow3CPU.cpp graphics/opengl_renderer/loader/Loader.cpp graphics/opengl_renderer/loader/LoaderStages.cpp graphics/opengl_renderer/ocean/CommonOceanRenderer.cpp diff --git a/game/graphics/opengl_renderer/BucketRenderer.h b/game/graphics/opengl_renderer/BucketRenderer.h index 207f0031800..126e8691092 100644 --- a/game/graphics/opengl_renderer/BucketRenderer.h +++ b/game/graphics/opengl_renderer/BucketRenderer.h @@ -55,6 +55,8 @@ struct SharedRenderState { // including transformation, rotation, perspective math::Vector4f camera_matrix[4]; + math::Vector4f camera_rot[4]; + math::Vector4f perspective[4]; math::Vector4f camera_hvdf_off; math::Vector4f camera_fog; math::Vector4f camera_pos; diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.cpp b/game/graphics/opengl_renderer/OpenGLRenderer.cpp index 5b38618f90f..36992f86b69 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.cpp +++ b/game/graphics/opengl_renderer/OpenGLRenderer.cpp @@ -112,6 +112,7 @@ OpenGLRenderer::OpenGLRenderer(std::shared_ptr texture_pool, } m_merc2 = std::make_shared(m_render_state.shaders, anim_slot_array()); + m_shadow3 = std::make_shared(m_render_state.shaders); m_generic2 = std::make_shared(m_render_state.shaders); // initialize all renderers @@ -251,7 +252,7 @@ void OpenGLRenderer::init_bucket_renderers_jak3() { BucketId::MERC_LCOM_SHRUB, m_merc2); // 350 - init_bucket_renderer("shadow", BucketCategory::OTHER, BucketId::SHADOW); + init_bucket_renderer("shadow", BucketCategory::OTHER, BucketId::SHADOW, m_shadow3); // 351 for (int i = 0; i < LEVEL_MAX; i++) { @@ -358,8 +359,8 @@ void OpenGLRenderer::init_bucket_renderers_jak3() { BucketId::GENERIC_SPRITE_3, m_generic2, Generic2::Mode::LIGHTNING); - init_bucket_renderer("shadow2", BucketCategory::OTHER, BucketId::SHADOW2); - init_bucket_renderer("shadow3", BucketCategory::OTHER, BucketId::SHADOW3); + init_bucket_renderer("shadow2", BucketCategory::OTHER, BucketId::SHADOW2, m_shadow3); + init_bucket_renderer("shadow3", BucketCategory::OTHER, BucketId::SHADOW3, m_shadow3); // 575 init_bucket_renderer("tex-warp", BucketCategory::TEX, BucketId::TEX_WARP, m_texture_animator); @@ -550,7 +551,7 @@ void OpenGLRenderer::init_bucket_renderers_jak2() { init_bucket_renderer("gmerc-lcom-tfrag", BucketCategory::GENERIC, BucketId::GMERC_LCOM_TFRAG, m_generic2, Generic2::Mode::NORMAL); - init_bucket_renderer("shadow", BucketCategory::OTHER, BucketId::SHADOW); + init_bucket_renderer("shadow", BucketCategory::OTHER, BucketId::SHADOW, m_shadow3); // 220 init_bucket_renderer("tex-lcom-pris", BucketCategory::TEX, BucketId::TEX_LCOM_PRIS, m_texture_animator); @@ -570,7 +571,7 @@ void OpenGLRenderer::init_bucket_renderers_jak2() { init_bucket_renderer("tex-all-sprite", BucketCategory::TEX, BucketId::TEX_ALL_SPRITE, m_texture_animator); init_bucket_renderer("particles", BucketCategory::SPRITE, BucketId::PARTICLES); - init_bucket_renderer("shadow2", BucketCategory::OTHER, BucketId::SHADOW2); + init_bucket_renderer("shadow2", BucketCategory::OTHER, BucketId::SHADOW2, m_shadow3); init_bucket_renderer("effects", BucketCategory::OTHER, BucketId::EFFECTS, m_generic2, Generic2::Mode::LIGHTNING); init_bucket_renderer("tex-all-warp", BucketCategory::TEX, @@ -757,8 +758,9 @@ void OpenGLRenderer::init_bucket_renderers_jak1() { init_bucket_renderer("common-alpha-generic", BucketCategory::GENERIC, BucketId::GENERIC_ALPHA, m_generic2, - Generic2::Mode::NORMAL); // 46 - init_bucket_renderer("shadow", BucketCategory::OTHER, BucketId::SHADOW); // 47 + Generic2::Mode::NORMAL); // 46 + init_bucket_renderer("shadow", BucketCategory::OTHER, BucketId::SHADOW, + m_shadow3); // 47 //----------------------- // LEVEL 0 pris texture diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.h b/game/graphics/opengl_renderer/OpenGLRenderer.h index 667fe2fecf0..fdd7e07c505 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.h +++ b/game/graphics/opengl_renderer/OpenGLRenderer.h @@ -13,6 +13,7 @@ #include "game/graphics/opengl_renderer/TextureAnimator.h" #include "game/graphics/opengl_renderer/foreground/Generic2.h" #include "game/graphics/opengl_renderer/foreground/Merc2.h" +#include "game/graphics/opengl_renderer/foreground/Shadow3.h" #include "game/graphics/opengl_renderer/opengl_utils.h" #include "game/tools/filter_menu/filter_menu.h" #include "game/tools/subtitle_editor/subtitle_editor.h" @@ -112,6 +113,7 @@ class OpenGLRenderer { std::shared_ptr m_merc2; std::shared_ptr m_generic2; + std::shared_ptr m_shadow3; std::shared_ptr m_texture_animator; std::vector> m_bucket_renderers; std::vector m_bucket_categories; diff --git a/game/graphics/opengl_renderer/Profiler.cpp b/game/graphics/opengl_renderer/Profiler.cpp index 69feba4e0e1..7479495fca7 100644 --- a/game/graphics/opengl_renderer/Profiler.cpp +++ b/game/graphics/opengl_renderer/Profiler.cpp @@ -108,7 +108,7 @@ void Profiler::draw_node(ProfilerNode& node, bool expand, int depth, float start bool color_orange = false; ImGui::PushStyleColor(ImGuiCol_Text, color); auto str = - fmt::format("{:20s} {:.2f}ms {:6d} tri {:4d} draw", node.m_name, node.m_stats.duration * 1000, + fmt::format("{:20s} {:.3f}ms {:6d} tri {:4d} draw", node.m_name, node.m_stats.duration * 1000, node.m_stats.triangles, node.m_stats.draw_calls); if (node.m_children.empty()) { ImGui::Text(" %s", str.c_str()); @@ -154,7 +154,7 @@ std::string Profiler::to_string() { void ProfilerNode::to_string_helper(std::string& str, int depth) const { str += - fmt::format("{}{:.2f} ms {:30s}\n", std::string(depth, ' '), m_stats.duration * 1000, m_name); + fmt::format("{}{:.3f} ms {:30s}\n", std::string(depth, ' '), m_stats.duration * 1000, m_name); for (const auto& child : m_children) { child.to_string_helper(str, depth + 1); } diff --git a/game/graphics/opengl_renderer/Shader.cpp b/game/graphics/opengl_renderer/Shader.cpp index 9e18e7e9d24..2d1b14e4912 100644 --- a/game/graphics/opengl_renderer/Shader.cpp +++ b/game/graphics/opengl_renderer/Shader.cpp @@ -132,6 +132,7 @@ ShaderLibrary::ShaderLibrary(GameVersion version) { at(ShaderId::HFRAG_MONTAGE) = {"hfrag_montage", version}; at(ShaderId::PLAIN_TEXTURE) = {"plain_texture", version}; at(ShaderId::TIE_WIND) = {"tie_wind", version}; + at(ShaderId::SHADOW3) = {"shadow3", version}; for (auto& shader : m_shaders) { ASSERT_MSG(shader.okay(), "error compiling shader"); diff --git a/game/graphics/opengl_renderer/Shader.h b/game/graphics/opengl_renderer/Shader.h index 99acd7b186c..59e050f096e 100644 --- a/game/graphics/opengl_renderer/Shader.h +++ b/game/graphics/opengl_renderer/Shader.h @@ -65,6 +65,7 @@ enum class ShaderId { HFRAG_MONTAGE = 38, PLAIN_TEXTURE = 39, TIE_WIND = 40, + SHADOW3 = 41, MAX_SHADERS }; diff --git a/game/graphics/opengl_renderer/ShadowRenderer.cpp b/game/graphics/opengl_renderer/ShadowRenderer.cpp index 8f3fa3f6464..00ec163ad4b 100644 --- a/game/graphics/opengl_renderer/ShadowRenderer.cpp +++ b/game/graphics/opengl_renderer/ShadowRenderer.cpp @@ -4,7 +4,8 @@ #include "third-party/imgui/imgui.h" -ShadowRenderer::ShadowRenderer(const std::string& name, int my_id) : BucketRenderer(name, my_id) { +ShadowRenderer::ShadowRenderer(const std::string& name, int my_id, std::shared_ptr shadow3) + : BucketRenderer(name, my_id), m_shadow3(shadow3) { // create OpenGL objects glGenBuffers(1, &m_ogl.vertex_buffer); @@ -35,9 +36,13 @@ ShadowRenderer::ShadowRenderer(const std::string& name, int my_id) : BucketRende } void ShadowRenderer::draw_debug_window() { - ImGui::Checkbox("Volume", &m_debug_draw_volume); - ImGui::Text("Vert: %d, Front: %d, Back: %d\n", m_next_vertex, m_next_front_index, - m_next_back_index); + if (m_using_shadow3) { + m_shadow3->draw_debug_window(); + } else { + ImGui::Checkbox("Volume", &m_debug_draw_volume); + ImGui::Text("Vert: %d, Front: %d, Back: %d\n", m_next_vertex, m_next_front_index, + m_next_back_index); + } } ShadowRenderer::~ShadowRenderer() { @@ -201,6 +206,12 @@ void ShadowRenderer::render(DmaFollower& dma, return; } + m_using_shadow3 = dma.current_tag_vifcode0().kind != VifCode::Kind::STCYCL; + if (m_using_shadow3) { + m_shadow3->render_jak1(dma, render_state, prof); + return; + } + { // constants auto constants = dma.read_and_advance(); @@ -315,7 +326,7 @@ void ShadowRenderer::render(DmaFollower& dma, m_color.y() = g / 255.0f; m_color.z() = b / 255.0f; m_color.w() = a / 128.0f; - // fmt::print("rgba: {} {} {} {}\n", r, g, b, a); + // fmt::print("rgba: {}\n", m_color.to_string_aligned()); } else { ASSERT_MSG(false, fmt::format("{} {}", next.vifcode0().print(), next.vifcode1().print())); } diff --git a/game/graphics/opengl_renderer/ShadowRenderer.h b/game/graphics/opengl_renderer/ShadowRenderer.h index 70afcf44da2..32722b91e6f 100644 --- a/game/graphics/opengl_renderer/ShadowRenderer.h +++ b/game/graphics/opengl_renderer/ShadowRenderer.h @@ -2,10 +2,15 @@ #include "game/common/vu.h" #include "game/graphics/opengl_renderer/BucketRenderer.h" +#include "game/graphics/opengl_renderer/foreground/Shadow3.h" +/*! + * Jak 1 shadow renderer. This uses mips2c'd VU1 code and isn't very efficient. + * If it detects PC shadow enabled, it will instead render with Shadow3. + */ class ShadowRenderer : public BucketRenderer { public: - ShadowRenderer(const std::string& name, int my_id); + ShadowRenderer(const std::string& name, int my_id, std::shared_ptr shadow3); ~ShadowRenderer(); void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; void draw_debug_window() override; @@ -127,4 +132,6 @@ class ShadowRenderer : public BucketRenderer { } m_ogl; bool m_debug_draw_volume = false; + std::shared_ptr m_shadow3; + bool m_using_shadow3 = false; }; diff --git a/game/graphics/opengl_renderer/background/background_common.cpp b/game/graphics/opengl_renderer/background/background_common.cpp index b3f9f6ca569..d14723a7771 100644 --- a/game/graphics/opengl_renderer/background/background_common.cpp +++ b/game/graphics/opengl_renderer/background/background_common.cpp @@ -829,6 +829,8 @@ void update_render_state_from_pc_settings(SharedRenderState* state, const TfragP for (int i = 0; i < 4; i++) { state->camera_planes[i] = data.camera.planes[i]; state->camera_matrix[i] = data.camera.camera[i]; + state->camera_rot[i] = data.camera.rot[i]; + state->perspective[i] = data.camera.perspective[i]; } state->camera_pos = data.camera.trans; state->camera_hvdf_off = data.camera.hvdf_off; diff --git a/game/graphics/opengl_renderer/foreground/Shadow2.cpp b/game/graphics/opengl_renderer/foreground/Shadow2.cpp index c8ffcebf97a..acf772728c1 100644 --- a/game/graphics/opengl_renderer/foreground/Shadow2.cpp +++ b/game/graphics/opengl_renderer/foreground/Shadow2.cpp @@ -2,7 +2,8 @@ #include "third-party/imgui/imgui.h" -Shadow2::Shadow2(const std::string& name, int my_id) : BucketRenderer(name, my_id) { +Shadow2::Shadow2(const std::string& name, int my_id, std::shared_ptr shadow3) + : BucketRenderer(name, my_id), m_shadow3(shadow3) { m_vertex_buffer.resize(kMaxVerts); m_front_index_buffer.resize(kMaxInds); m_back_index_buffer.resize(kMaxInds); @@ -53,7 +54,11 @@ void Shadow2::init_shaders(ShaderLibrary& shaders) { } void Shadow2::draw_debug_window() { - ImGui::Checkbox("volume", &m_debug_draw_volume); + if (m_using_shadow3) { + m_shadow3->draw_debug_window(); + } else { + ImGui::Checkbox("volume", &m_debug_draw_volume); + } } void Shadow2::reset_buffers() { @@ -71,6 +76,12 @@ void Shadow2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedPr return; } + m_using_shadow3 = dma.current_tag_vifcode0().kind == VifCode::Kind::PC_PORT; + if (m_using_shadow3) { + m_shadow3->render_jak1(dma, render_state, prof); + return; + } + auto maybe_constants = dma.read_and_advance(); if (maybe_constants.size_bytes == 0 && maybe_constants.vif0() == 0 && maybe_constants.vif1() == 0) { diff --git a/game/graphics/opengl_renderer/foreground/Shadow2.h b/game/graphics/opengl_renderer/foreground/Shadow2.h index 08b3a6757fa..e07e420bc28 100644 --- a/game/graphics/opengl_renderer/foreground/Shadow2.h +++ b/game/graphics/opengl_renderer/foreground/Shadow2.h @@ -3,12 +3,13 @@ #include "common/math/Vector.h" #include "game/graphics/opengl_renderer/BucketRenderer.h" +#include "game/graphics/opengl_renderer/foreground/Shadow3.h" class Shadow2 : public BucketRenderer { public: static constexpr int kMaxVerts = 8192 * 3 * 2; static constexpr int kMaxInds = kMaxVerts; - Shadow2(const std::string& name, int my_id); + Shadow2(const std::string& name, int my_id, std::shared_ptr shadow3); ~Shadow2(); void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; void draw_debug_window() override; @@ -94,4 +95,6 @@ class Shadow2 : public BucketRenderer { ScopedProfilerNode& prof, const FrameConstants& constants); u8 m_color[4] = {0, 0, 0, 0}; + std::shared_ptr m_shadow3; + bool m_using_shadow3 = false; }; diff --git a/game/graphics/opengl_renderer/foreground/Shadow3.cpp b/game/graphics/opengl_renderer/foreground/Shadow3.cpp new file mode 100644 index 00000000000..c4afa317d4a --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Shadow3.cpp @@ -0,0 +1,516 @@ +#include "Shadow3.h" + +#include "game/runtime.h" + +Shadow3::Shadow3(ShaderLibrary& shaders) { + glGenVertexArrays(1, &m_opengl.vao); + glBindVertexArray(m_opengl.vao); + + glGenBuffers(1, &m_opengl.indices); + glGenBuffers(1, &m_opengl.debug_verts); + + glGenBuffers(1, &m_opengl.bones_buffer); + glBindBuffer(GL_UNIFORM_BUFFER, m_opengl.bones_buffer); + + GLint val; + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &val); + if (val <= 16) { + m_opengl.buffer_alignment = 1; + } else { + m_opengl.buffer_alignment = val / 16; + if (m_opengl.buffer_alignment * 16 != (u32)val) { + ASSERT_MSG(false, + fmt::format("opengl uniform buffer alignment is {}, which is strange\n", val)); + } + } + { + auto& shader = shaders.at(ShaderId::SHADOW3); + shader.activate(); + auto id = shader.id(); + m_uniforms.camera_rot = glGetUniformLocation(id, "camera_rot"); + m_uniforms.fog_constants = glGetUniformLocation(id, "fog_constants"); + m_uniforms.hvdf_offset = glGetUniformLocation(id, "hvdf_offset"); + m_uniforms.perspective_matrix = glGetUniformLocation(id, "perspective_matrix"); + m_uniforms.debug_color = glGetUniformLocation(id, "debug_color"); + m_uniforms.origin = glGetUniformLocation(id, "origin"); + m_uniforms.top_plane = glGetUniformLocation(id, "top_plane"); + m_uniforms.bottom_plane = glGetUniformLocation(id, "bottom_plane"); + m_uniforms.scissor_top = glGetUniformLocation(id, "scissor_top"); + } + + std::vector temp(MAX_SHADER_BONE_VECTORS * sizeof(math::Vector4f)); + glBufferData(GL_UNIFORM_BUFFER, MAX_SHADER_BONE_VECTORS * sizeof(math::Vector4f), temp.data(), + GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); +} + +Shadow3::~Shadow3() { + glDeleteBuffers(1, &m_opengl.bones_buffer); + glDeleteBuffers(1, &m_opengl.indices); + glDeleteBuffers(1, &m_opengl.debug_verts); + glDeleteVertexArrays(1, &m_opengl.vao); +} + +void Shadow3::setup_for_level(SharedRenderState* render_state, const LevelData* level_data) { + glBindVertexArray(m_opengl.vao); + glBindBuffer(GL_ARRAY_BUFFER, m_hacks ? m_opengl.debug_verts : level_data->shadow_vertices); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_GEQUAL); + glDepthMask(GL_FALSE); // no depth writes. + + glVertexAttribPointer(0, // location 0 in the shader + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::ShadowVertex), // stride + (void*)offsetof(tfrag3::ShadowVertex, pos) // offset (0) + ); + + glVertexAttribPointer(1, // location 1 in the + 1, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::ShadowVertex), // stride + (void*)offsetof(tfrag3::ShadowVertex, weight) // offset (0) + ); + + glVertexAttribIPointer(2, // location 2 in the + 2, // + GL_UNSIGNED_BYTE, // u8's + sizeof(tfrag3::ShadowVertex), // + (void*)offsetof(tfrag3::ShadowVertex, mats[0]) // offset in array + ); + + glVertexAttribIPointer(3, // location 2 in the + 1, // + GL_UNSIGNED_BYTE, // u8's + sizeof(tfrag3::ShadowVertex), // + (void*)offsetof(tfrag3::ShadowVertex, flags) // offset in array + ); +} + +namespace { +void set_uniform(GLint uniform, const math::Vector3f& val) { + glUniform3f(uniform, val.x(), val.y(), val.z()); +} +void set_uniform(GLint uniform, const math::Vector4f& val) { + glUniform4f(uniform, val.x(), val.y(), val.z(), val.w()); +} + +} // namespace + +void Shadow3::draw_model(SharedRenderState* render_state, + ShadowRequest* request, + ScopedProfilerNode& prof) { + for (const auto& frag : request->model.model->fragments) { + ShadowCPUInput input{ + .origin = request->origin, + .top_plane = request->top_plane, + .bottom_plane = request->bottom_plane, + .light_dir = request->light_dir, + .bones = request->bones, + .model = &frag, + .vertices = &request->model.level->level->shadow_data.vertices, + .scissor_top = request->scissor_top, + .debug_highlight_tri = m_debug_tri, + }; + calc_shadow_indices(input, &m_cpu_workspace, &m_cpu_output); + glBindBuffer(GL_UNIFORM_BUFFER, m_opengl.bones_buffer); + glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_opengl.bones_buffer, + sizeof(math::Vector4f) * request->bone_idx, 128 * 16 * 4); + // const auto* geo = request->model.model; + // printf("draw %s\n", geo->name.c_str()); + + set_uniform(m_uniforms.origin, request->origin); + set_uniform(m_uniforms.top_plane, request->top_plane); + set_uniform(m_uniforms.bottom_plane, request->bottom_plane); + glUniform1i(m_uniforms.scissor_top, request->scissor_top); + + if (m_hacks) { + int num_verts = frag.num_one_bone_vertices + frag.num_two_bone_vertices; + std::vector verts; + for (size_t i = 0; i < num_verts; ++i) { + auto& out = verts.emplace_back(); + out.flags = 255; + out.mats[0] = 255; + out.mats[1] = 255; + out.pos[0] = m_cpu_workspace.vertices[i].x(); + out.pos[1] = m_cpu_workspace.vertices[i].y(); + out.pos[2] = m_cpu_workspace.vertices[i].z(); + out.weight = m_cpu_workspace.vertices[i].w(); + } + + for (size_t i = 0; i < num_verts; ++i) { + auto& out = verts.emplace_back(); + out.flags = 255; + out.mats[0] = 255; + out.mats[1] = 255; + out.pos[0] = m_cpu_workspace.dual_vertices[i].x(); + out.pos[1] = m_cpu_workspace.dual_vertices[i].y(); + out.pos[2] = m_cpu_workspace.dual_vertices[i].z(); + out.weight = m_cpu_workspace.dual_vertices[i].w(); + } + + glEnable(GL_DEPTH_TEST); + glDisable(GL_BLEND); + glDepthFunc(GL_GEQUAL); + glDepthMask(GL_TRUE); + glEnable(GL_CULL_FACE); + glCullFace(m_cull_back ? GL_BACK : GL_FRONT); + glBindBuffer(GL_ARRAY_BUFFER, m_opengl.debug_verts); + glBufferData(GL_ARRAY_BUFFER, num_verts * 2 * sizeof(tfrag3::ShadowVertex), verts.data(), + GL_DYNAMIC_DRAW); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_opengl.indices); + set_uniform(m_uniforms.debug_color, math::Vector3f(0.5f, 0.5f, 0.5f)); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, m_cpu_output.num_f0_indices * sizeof(u32), + m_cpu_output.f0_indices, GL_DYNAMIC_DRAW); + glDrawElements(GL_TRIANGLES, m_cpu_output.num_f0_indices, GL_UNSIGNED_INT, nullptr); + set_uniform(m_uniforms.debug_color, math::Vector3f(0.f, 0.f, 0.f)); + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + glDrawElements(GL_TRIANGLES, m_cpu_output.num_f0_indices, GL_UNSIGNED_INT, nullptr); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + + glBufferData(GL_ELEMENT_ARRAY_BUFFER, m_cpu_output.num_f1_indices * sizeof(u32), + m_cpu_output.f1_indices, GL_DYNAMIC_DRAW); + set_uniform(m_uniforms.debug_color, math::Vector3f(0.f, 0.f, 0.f)); + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + glDrawElements(GL_TRIANGLES, m_cpu_output.num_f1_indices, GL_UNSIGNED_INT, nullptr); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + + glEnable(GL_BLEND); + glBlendEquation(GL_FUNC_ADD); + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); + set_uniform(m_uniforms.debug_color, math::Vector3f(0.5f, 0.78f, 0.5f)); + glDrawElements(GL_TRIANGLES, m_cpu_output.num_f1_indices, GL_UNSIGNED_INT, nullptr); + + glBindBuffer(GL_ARRAY_BUFFER, request->model.level->shadow_vertices); + glDisable(GL_CULL_FACE); + + } else { + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_opengl.indices); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, m_cpu_output.num_indices * sizeof(u32), + m_cpu_output.indices, GL_DYNAMIC_DRAW); + // enable stencil! + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); // no color writes. + glEnable(GL_STENCIL_TEST); + glStencilMask(0xFF); + glEnable(GL_DEPTH_TEST); + glDisable(GL_BLEND); + glDepthFunc(GL_GEQUAL); + + if (false) { + glEnable(GL_CULL_FACE); + glCullFace(GL_BACK); + glStencilFunc(GL_ALWAYS, 0, 0); // always pass stencil + glStencilOp(GL_KEEP, GL_KEEP, GL_INCR); // increment on depth fail + glDrawElements(GL_TRIANGLES, m_cpu_output.num_indices, GL_UNSIGNED_INT, nullptr); + glCullFace(GL_FRONT); + glStencilFunc(GL_ALWAYS, 0, 0); + glStencilOp(GL_KEEP, GL_KEEP, GL_DECR); // decrement on depth pass. + glDrawElements(GL_TRIANGLES, m_cpu_output.num_indices, GL_UNSIGNED_INT, nullptr); + } else { + glEnable(GL_CULL_FACE); + glCullFace(GL_FRONT); + glStencilFunc(GL_ALWAYS, 0, 0); // always pass stencil + glStencilOp(GL_KEEP, GL_INCR, GL_KEEP); // increment on depth fail + glDrawElements(GL_TRIANGLES, m_cpu_output.num_indices, GL_UNSIGNED_INT, nullptr); + glCullFace(GL_BACK); + glStencilFunc(GL_ALWAYS, 0, 0); + glStencilOp(GL_KEEP, GL_DECR, GL_KEEP); // decrement on depth pass. + glDrawElements(GL_TRIANGLES, m_cpu_output.num_indices, GL_UNSIGNED_INT, nullptr); + } + + glDisable(GL_CULL_FACE); + } + } +} + +void Shadow3::finish(SharedRenderState* render_state, ScopedProfilerNode& prof) { + // finally, draw shadow. + if (!m_hacks) { + glDepthMask(GL_FALSE); // no depth writes. + + if (render_state->version == GameVersion::Jak1) { + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_FALSE); + glStencilFunc(GL_NOTEQUAL, 0, 0xFF); + glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); + glDepthFunc(GL_ALWAYS); + glEnable(GL_BLEND); + glBlendEquation(GL_FUNC_ADD); + glBlendFuncSeparate(GL_DST_COLOR, GL_ZERO, GL_ONE, GL_ZERO); + m_full_screen_draw.draw(m_color, render_state, prof); + + } else { + glStencilFunc(GL_NOTEQUAL, 0, 0xFF); + glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); + glDepthFunc(GL_ALWAYS); + + glEnable(GL_BLEND); + glBlendFuncSeparate(GL_ONE, GL_ONE, GL_ONE, GL_ZERO); + + bool have_darken = false; + bool have_lighten = false; + bool lighten_channel[3] = {false, false, false}; + bool darken_channel[3] = {false, false, false}; + for (int i = 0; i < 3; i++) { + if (m_color[i] > 128) { + have_lighten = true; + lighten_channel[i] = true; + } else if (m_color[i] < 128) { + have_darken = true; + darken_channel[i] = true; + } + } + + if (have_darken) { + glColorMask(darken_channel[0], darken_channel[1], darken_channel[2], false); + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + m_full_screen_draw.draw( + math::Vector4f((m_color[3] - m_color[0]) / 256.f, (m_color[3] - m_color[1]) / 256.f, + (m_color[3] - m_color[2]) / 256.f, 0) * + 0.5f, + render_state, prof); + } + + if (have_lighten) { + glColorMask(lighten_channel[0], lighten_channel[1], lighten_channel[2], false); + glBlendEquation(GL_FUNC_ADD); + m_full_screen_draw.draw( + math::Vector4f((m_color[0] - m_color[3]) / 256.f, (m_color[1] - m_color[3]) / 256.f, + (m_color[2] - m_color[3]) / 256.f, 0) * + 0.5f, + render_state, prof); + } + } + } + + // restore + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glBlendEquation(GL_FUNC_ADD); + glDepthMask(GL_TRUE); + glDisable(GL_STENCIL_TEST); + + glBindVertexArray(0); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + glBindBuffer(GL_UNIFORM_BUFFER, 0); +} + +void Shadow3::flush_requests(SharedRenderState* render_state, ScopedProfilerNode& prof) { + if (m_next_request == 0) { + return; + } + + if (!m_did_first_time_setup) { + first_time_setup(render_state); + m_did_first_time_setup = true; + } + + glBindBuffer(GL_UNIFORM_BUFFER, m_opengl.bones_buffer); + glBufferSubData(GL_UNIFORM_BUFFER, 0, m_next_free_bone_vector * sizeof(math::Vector4f), + m_shader_bone_vector_buffer); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + for (auto& c : m_level_chains) { + if (!c.head) + continue; + setup_for_level(render_state, c.level); + ShadowRequest* iter = c.head; + while (iter) { + draw_model(render_state, iter, prof); + iter = iter->next; + } + } + + for (auto& c : m_level_chains) { + c.level = nullptr; + c.head = nullptr; + } + m_next_request = 0; + m_next_free_bone_vector = 0; +} + +void Shadow3::first_time_setup(SharedRenderState* render_state) { + glClearStencil(0); + glClear(GL_STENCIL_BUFFER_BIT); + render_state->stencil_dirty = true; + + render_state->shaders[ShaderId::SHADOW3].activate(); + glUniformMatrix4fv(m_uniforms.camera_rot, 1, GL_FALSE, &render_state->camera_rot[0].x()); + glUniformMatrix4fv(m_uniforms.perspective_matrix, 1, GL_FALSE, &render_state->perspective[0].x()); + set_uniform(m_uniforms.fog_constants, render_state->camera_fog); + set_uniform(m_uniforms.hvdf_offset, render_state->camera_hvdf_off); +} + +void Shadow3::draw_debug_window() { + ImGui::Checkbox("hacks", &m_hacks); + ImGui::Checkbox("near_plane", &m_near_plane_hack); + ImGui::Checkbox("back?", &m_cull_back); + ImGui::InputInt("Tri", &m_debug_tri); +} + +void Shadow3::render_jak1(DmaFollower& dma, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + m_did_first_time_setup = false; + while (dma.current_tag_offset() != render_state->next_bucket) { + auto data = dma.read_and_advance(); + + if (data.vifcode0().kind == VifCode::Kind::PC_PORT) { + u32 next = data.data_offset; + while (next) { + Jak1ShadowRequest game_request; + memcpy(&game_request, g_ee_main_mem + next, sizeof(Jak1ShadowRequest)); + next = game_request.next; + + char name[128]; + strncpy(name, (const char*)(g_ee_main_mem) + 4 + game_request.geo_name, 128); + name[127] = 0; + // printf(" draw %s\n", name); + + auto model = render_state->loader->get_shadow_model(name); + if (!model) { + // printf(" SKIP: no model data\n"); + continue; + } + + constexpr u32 kCullWhenUnderPlane = 1; + // constexpr u32 kDisableFade = 2; + constexpr u32 kAbsolutePlanes = 4; + constexpr u32 kUpperClip = 8; + // constexpr u32 kFlag4 = 16; + constexpr u32 kDisableDraw = 32; + + if (game_request.settings.flags & kDisableDraw) { + continue; + } + + if (game_request.num_joints * 4 + m_next_free_bone_vector + m_opengl.buffer_alignment >= + MAX_SHADER_BONE_VECTORS) { + flush_requests(render_state, prof); + } + + if (m_next_request == m_requests.size()) { + flush_requests(render_state, prof); + } + + LevelChain* chain = nullptr; + for (auto& c : m_level_chains) { + if (c.level == model->level || !c.level) { + chain = &c; + chain->level = model->level; + break; + } + } + + if (!chain) { + ASSERT_NOT_REACHED(); + } + + // grab the next request and link it to the chain for the level. + auto& request = m_requests[m_next_request++]; + + request.model = *model; + // the origin of "light" for the shadow is found by starting at the "center" point + // (somewhere in the model) and following the shadow direction backward. + request.origin = game_request.settings.center + + game_request.settings.shadow_dir * game_request.settings.dist_to_locus; + request.bones = g_ee_main_mem + game_request.mtx; + request.scissor_top = game_request.settings.flags & kUpperClip; + request.color = game_request.color; + request.dist_to_locus = game_request.settings.dist_to_locus; + m_color = request.color; + // m_color = {0.2, 0.8, 0.2, 1.}; + + // copy bones to buffer + constexpr int in_stride = 8 * 4 * sizeof(float); + constexpr int out_stride = 4 * 4 * sizeof(float); + constexpr int in_offset = 3 * in_stride; + m_next_free_bone_vector += m_opengl.buffer_alignment - 1; + m_next_free_bone_vector /= m_opengl.buffer_alignment; + m_next_free_bone_vector *= m_opengl.buffer_alignment; + request.bone_idx = m_next_free_bone_vector; + for (int i = 0; i < game_request.num_joints; i++) { + memcpy(&m_shader_bone_vector_buffer[m_next_free_bone_vector].x(), + g_ee_main_mem + game_request.mtx + in_offset + i * in_stride, out_stride); + m_next_free_bone_vector += 4; + } + + // the clipping planes for the shadow + request.top_plane = game_request.settings.top_plane; + request.bottom_plane = game_request.settings.bot_plane; + if (!(kAbsolutePlanes & game_request.settings.flags)) { + // in relative planes mode, the height of the plane is adjusted to be relative to the + // height of the center, so the planes move and down with the model + request.top_plane.w() -= game_request.settings.center.y(); + if (m_near_plane_hack) { + request.bottom_plane.w() = 4096; + } + request.bottom_plane.w() -= game_request.settings.center.y(); + } + + // skip drawing if the camera is below the lower clipping plane + if (!m_hacks && (kCullWhenUnderPlane & game_request.settings.flags)) { + if (render_state->camera_pos.xyz().dot(request.bottom_plane.xyz()) + + request.bottom_plane.w() < + 0) { + m_next_request--; + continue; + } + } + + request.next = chain->head; + chain->head = &request; + + // detect if the origin is below the clipping plane and if so, move it up. + // the logic for this changed in jak2, to support shadows with negative dist_from_locus + if (render_state->version == GameVersion::Jak1) { + const float dot = request.bottom_plane.xyz().dot(request.origin); + if (dot + request.bottom_plane.w() > 0) { + request.bottom_plane.w() = -dot; + } + } else { + const float bot_offset = request.origin.dot(request.bottom_plane.xyz()); + const float top_offset = request.origin.dot(request.top_plane.xyz()); + if ((request.bottom_plane.w() < bot_offset) && (top_offset < request.top_plane.w())) { + if (request.dist_to_locus > 0) { + request.bottom_plane.w() = -bot_offset; + } else { + request.top_plane.w() = -top_offset; + } + } + } + + const auto& cam_rot = render_state->camera_rot; + + request.light_dir = game_request.settings.shadow_dir; + + // transform to camera frame + auto rotate = [&](const math::Vector3f& in) { + return (cam_rot[0] * in[0] + cam_rot[1] * in[1] + cam_rot[2] * in[2]).xyz(); + }; + + auto transform = [&](const math::Vector3f& in) { + return (cam_rot[0] * in[0] + cam_rot[1] * in[1] + cam_rot[2] * in[2] + cam_rot[3]).xyz(); + }; + + auto rotate_plane = [&](const math::Vector4f& in) { + auto xyz = rotate(in.xyz()); + return math::Vector4f(xyz.x(), xyz.y(), xyz.z(), in.w() - xyz.dot(cam_rot[3].xyz())); + }; + + request.light_dir = rotate(request.light_dir); + request.top_plane = rotate_plane(request.top_plane); + request.bottom_plane = rotate_plane(request.bottom_plane); + request.origin = transform(request.origin); + } + } + } + + flush_requests(render_state, prof); + finish(render_state, prof); +} diff --git a/game/graphics/opengl_renderer/foreground/Shadow3.h b/game/graphics/opengl_renderer/foreground/Shadow3.h new file mode 100644 index 00000000000..49a4b29d7d9 --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Shadow3.h @@ -0,0 +1,100 @@ +#pragma once +#include "game/graphics/opengl_renderer/BucketRenderer.h" +#include "game/graphics/opengl_renderer/foreground/Shadow3CPU.h" +#include "game/graphics/opengl_renderer/opengl_utils.h" + +struct Jak1ShadowSettings { + math::Vector center; + u32 flags; + math::Vector shadow_dir; + float dist_to_locus; + math::Vector4f bot_plane; + math::Vector4f top_plane; + float fade_dist; + float fade_start; + s32 dummy2; + s32 dummy3; +}; +static_assert(sizeof(Jak1ShadowSettings) == 5 * 16); + +struct Jak1ShadowRequest { + u8 dma[16]; + Jak1ShadowSettings settings; + math::Vector4f color; + u32 geo_name; + u32 mtx; + u32 num_joints; + u32 next; +}; + +class Shadow3 { + public: + Shadow3(ShaderLibrary& shaders); + ~Shadow3(); + void render_jak1(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof); + void draw_debug_window(); + + private: + struct ShadowRequest { + ShadowRef model; + math::Vector origin; + math::Vector4f top_plane, bottom_plane; + math::Vector3f light_dir; + float dist_to_locus; + math::Vector4f color; + ShadowRequest* next = nullptr; + const u8* bones = nullptr; + u32 bone_idx = 0; + bool scissor_top = false; + }; + + struct LevelChain { + const LevelData* level = nullptr; + ShadowRequest* head = nullptr; + }; + void flush_requests(SharedRenderState* render_state, ScopedProfilerNode& prof); + void first_time_setup(SharedRenderState* render_state); + void setup_for_level(SharedRenderState* render_state, const LevelData* level_data); + void draw_model(SharedRenderState* render_state, + ShadowRequest* request, + ScopedProfilerNode& prof); + void finish(SharedRenderState* render_state, ScopedProfilerNode& prof); + std::array m_requests; + std::array m_level_chains; + int m_next_request = 0; + + static constexpr int MAX_SHADER_BONE_VECTORS = 1024 * 16; // ?? + math::Vector4f m_shader_bone_vector_buffer[MAX_SHADER_BONE_VECTORS]; + u32 m_next_free_bone_vector = 0; + + struct { + GLuint vao = -1; + GLuint indices = -1; + GLuint debug_verts = -1; + GLuint bones_buffer = -1; + int buffer_alignment = 0; + } m_opengl; + + struct { + GLint hvdf_offset = 0; + GLint fog_constants = 0; + GLint perspective_matrix = 0; + GLint camera_rot = 0; + GLint debug_color = 0; + GLint bottom_plane = 0; + GLint top_plane = 0; + GLint origin = 0; + GLint scissor_top = 0; + } m_uniforms; + bool m_did_first_time_setup = false; + + bool m_hacks = false; + bool m_cull_back = false; + bool m_near_plane_hack = false; + int m_debug_tri = 0; + + ShadowCPUWorkspace m_cpu_workspace; + ShadowCPUOutput m_cpu_output; + FullScreenDraw m_full_screen_draw; + math::Vector4f m_color; +}; \ No newline at end of file diff --git a/game/graphics/opengl_renderer/foreground/Shadow3CPU.cpp b/game/graphics/opengl_renderer/foreground/Shadow3CPU.cpp new file mode 100644 index 00000000000..1a6e443f534 --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Shadow3CPU.cpp @@ -0,0 +1,290 @@ +#include "Shadow3CPU.h" + +// This file generates indices with correct face orientations for a shadow volume. +// As usual, Naughty Dog has a few tricks. +// You can have double-sided triangles - basically infinitely thin geometry that casts a shadow. +// It is also okay to have holes in your mesh of single-sided triangles as long as no light ray +// would enter or exit your "volume" through the gap first or last. +// (so you should only have gaps for weird internal areas that would never have an effect on the +// final shadow) It is always safe to eliminate triangles that are fully enclosed within another +// closed volume. + +/** + * Since the shadow mesh has skeletal animation, we must compute the vertex positions on the CPU to + * determine the shadow volume. There's no way around this other than geometry shaders. + */ +void transform_vertices(const ShadowCPUInput& input, ShadowCPUWorkspace* work) { + struct Bone { + math::Vector4f mat[4]; + u8 pad[16 * 4]; + }; + static_assert(sizeof(Bone) == 128); + const tfrag3::ShadowVertex* vertex_ptr = &input.vertices->operator[](input.model->first_vertex); + math::Vector4f* out_ptr = work->vertices; + const Bone* first_bone_ptr = (const Bone*)(3 * 8 * 4 * sizeof(float) + input.bones); + + // vertices influenced by one bone + for (int i = 0; i < input.model->num_one_bone_vertices; i++) { + const Bone& bone = first_bone_ptr[vertex_ptr->mats[0]]; + *out_ptr = bone.mat[3] + // + bone.mat[0] * vertex_ptr->pos[0] + // + bone.mat[1] * vertex_ptr->pos[1] + // + bone.mat[2] * vertex_ptr->pos[2]; + vertex_ptr++; + out_ptr++; + } + + // vertices influenced by two bones. + for (int i = 0; i < input.model->num_two_bone_vertices; i++) { + const Bone& bone0 = first_bone_ptr[vertex_ptr->mats[0]]; + math::Vector4f p0 = bone0.mat[3] + // + bone0.mat[0] * vertex_ptr->pos[0] + // + bone0.mat[1] * vertex_ptr->pos[1] + // + bone0.mat[2] * vertex_ptr->pos[2]; + p0 *= vertex_ptr->weight; + const Bone& bone1 = first_bone_ptr[vertex_ptr->mats[1]]; + math::Vector4f p1 = bone1.mat[3] + // + bone1.mat[0] * vertex_ptr->pos[0] + // + bone1.mat[1] * vertex_ptr->pos[1] + // + bone1.mat[2] * vertex_ptr->pos[2]; + p1 *= (1.f - vertex_ptr->weight); + *out_ptr = p0 + p1; + + out_ptr++; + vertex_ptr++; + } +} + +/** + * Compute the projection of each vertex onto the "bottom plane". This is another ND trick. + * Instead of doing the traditional thing of making an infinite volume, they effectively clip + * the mesh against some plane that sits (ideally) slightly below the ground. This avoids the issue + * of casting shadows on the "wrong side of the ground", and reduces fill. + */ +void calc_dual_verts(const ShadowCPUInput& input, ShadowCPUWorkspace* work) { + const int num_verts = input.model->num_one_bone_vertices + input.model->num_two_bone_vertices; + for (int i = 0; i < num_verts; i++) { + math::Vector4f origin(input.origin.x(), input.origin.y(), input.origin.z(), 1.f); + math::Vector4f p = work->vertices[i]; + math::Vector4f offset = origin - p; + math::Vector4f plane = input.bottom_plane; + work->dual_vertices[i] = p - offset * p.dot(plane) / offset.xyz().dot(plane.xyz()); + } +} + +/** + * Another ND trick: clip the mesh against some plane slightly above the ground. I think this just + * reduces fill. + */ +void scissor_top(const ShadowCPUInput& input, ShadowCPUWorkspace* work) { + const int num_verts = input.model->num_one_bone_vertices + input.model->num_two_bone_vertices; + for (int i = 0; i < num_verts; i++) { + auto& original_vertex = work->vertices[i]; + const auto& dual_vertex = work->dual_vertices[i]; + const float above = original_vertex.dot(input.top_plane); + if (above > 0) { + const math::Vector4f offset = dual_vertex - original_vertex; + float scale = above / offset.xyz().dot(input.top_plane.xyz()); + original_vertex -= offset * scale; + } + } +} + +/** + * Clip against the near plane. I'm not sure why this is needed, but it may be another fill-reducing + * trick, or maybe just allows them to avoid scissoring against the near plane in the VU program? + * Either way, it seems like this isn't really needed. + */ +void scissor_edges(const ShadowCPUInput& input, ShadowCPUWorkspace* work) { + // TODO +} + +/** + * Add "cap" triangles. These are either triangles in the original mesh that point toward the light, + * and their projection to the bottom plane. + */ +void find_facing_single_tris(const ShadowCPUInput& input, + ShadowCPUWorkspace* work, + ShadowCPUOutput* output, + const std::vector& tris) { + const int num_verts = input.model->num_one_bone_vertices + input.model->num_two_bone_vertices; + for (size_t i = 0; i < tris.size(); i++) { + const auto& tri = tris[i]; + // recompute normal after transformation: + math::Vector3f v0 = work->vertices[tri.verts[0]].xyz(); + math::Vector3f v1 = work->vertices[tri.verts[1]].xyz(); + math::Vector3f v2 = work->vertices[tri.verts[2]].xyz(); + math::Vector3f n = (v1 - v0).cross(v2 - v0); + + bool highlight = i == input.debug_highlight_tri; + if (n.dot(input.light_dir) < 0.f) { + work->tri_flags[i] = 1; + // facing toward the light, add the triangle as it appears in the original mesh + output->push_index(tri.verts[0], !highlight); + output->push_index(tri.verts[1], !highlight); + output->push_index(tri.verts[2], !highlight); + // and the projection. This triangle has the normal pointing the other way, since it closes + // the volume, so the indices are flipping. + output->push_index(static_cast(tri.verts[0]) + num_verts, !highlight); + output->push_index(static_cast(tri.verts[2]) + num_verts, !highlight); + output->push_index(static_cast(tri.verts[1]) + num_verts, !highlight); + } else { + // facing away from the light. + work->tri_flags[i] = 0; + } + } +} + +/** + * Build walls. A wall will happen on an edge between a facing and non-facing tri. + * Or, if there is no second tri, there will be a wall whenever the first tri is facing. + */ +void find_single_edges(const ShadowCPUInput& input, + ShadowCPUWorkspace* work, + ShadowCPUOutput* output) { + int edge_offset = input.model->num_one_bone_vertices + input.model->num_two_bone_vertices; + for (size_t i = 0; i < input.model->single_edges.size(); i++) { + const auto& e = input.model->single_edges[i]; + const u8 f0 = work->tri_flags[e.tri[0]]; + const auto t1 = e.tri[1]; + + bool flip = false; // set if the edge orientation is backward. + if (t1 == 255) { + if (f0 == 0) { // only one tri, skip if not facing + continue; + } + // if facing, then the edge is already oriented the right way! + } else { + const u8 f1 = work->tri_flags[e.tri[1]]; + if (f0 == f1) { // both tris face the same way - no wall needed. + continue; + } + flip = f0 == 0; // ND convention here for edge direction. + // this is somewhat of an odd convention because it seems like edges on singles + // are backward. oh well. + } + + if (!flip) { + output->push_index(e.ind[0], true); + output->push_index(static_cast(e.ind[0]) + edge_offset, true); + output->push_index(static_cast(e.ind[1]) + edge_offset, true); + + output->push_index(e.ind[0], true); + output->push_index(static_cast(e.ind[1]) + edge_offset, true); + output->push_index(e.ind[1], true); + } else { + output->push_index(e.ind[0], true); + output->push_index(static_cast(e.ind[1]) + edge_offset, true); + output->push_index(static_cast(e.ind[0]) + edge_offset, true); + + output->push_index(e.ind[0], true); + output->push_index(e.ind[1], true); + output->push_index(static_cast(e.ind[1]) + edge_offset, true); + } + } +} +/** + * Add cap triangles for double triangles. One side is always facing! + */ +void find_facing_double_tris(const ShadowCPUInput& input, + ShadowCPUWorkspace* work, + ShadowCPUOutput* output, + const std::vector& tris) { + const int num_verts = input.model->num_one_bone_vertices + input.model->num_two_bone_vertices; + + for (size_t i = 0; i < tris.size(); i++) { + const auto& tri = tris[i]; + math::Vector3f v0 = work->vertices[tri.verts[0]].xyz(); + math::Vector3f v1 = work->vertices[tri.verts[1]].xyz(); + math::Vector3f v2 = work->vertices[tri.verts[2]].xyz(); + math::Vector3f n = (v1 - v0).cross(v2 - v0); + if (n.dot(input.light_dir) < 0.f) { + work->tri_flags[i] = 1; + // treat this as a normal single-sided triangle that is facing. + output->push_index(tri.verts[0], false); + output->push_index(tri.verts[1], false); + output->push_index(tri.verts[2], false); + output->push_index(static_cast(tri.verts[0]) + num_verts, false); + output->push_index(static_cast(tri.verts[2]) + num_verts, false); + output->push_index(static_cast(tri.verts[1]) + num_verts, false); + } else { + work->tri_flags[i] = 0; + // we need to flip vertices to face the light. + output->push_index(tri.verts[0], false); + output->push_index(tri.verts[2], false); + output->push_index(tri.verts[1], false); + output->push_index(static_cast(tri.verts[0]) + num_verts, false); + output->push_index(static_cast(tri.verts[1]) + num_verts, false); + output->push_index(static_cast(tri.verts[2]) + num_verts, false); + } + } +} + +void find_double_edges(const ShadowCPUInput& input, + ShadowCPUWorkspace* work, + ShadowCPUOutput* output) { + int edge_offset = input.model->num_one_bone_vertices + input.model->num_two_bone_vertices; + for (size_t i = 0; i < input.model->double_edges.size(); i++) { + const auto& e = input.model->double_edges[i]; + const u8 f0 = work->tri_flags[e.tri[0]]; + const auto t1 = e.tri[1]; + + auto add = [&](bool flip) { + if (flip) { + output->push_index(e.ind[0], true); + output->push_index(static_cast(e.ind[0]) + edge_offset, true); + output->push_index(static_cast(e.ind[1]) + edge_offset, true); + + output->push_index(e.ind[0], true); + output->push_index(static_cast(e.ind[1]) + edge_offset, true); + output->push_index(e.ind[1], true); + } else { + output->push_index(e.ind[0], true); + output->push_index(static_cast(e.ind[1]) + edge_offset, true); + output->push_index(static_cast(e.ind[0]) + edge_offset, true); + + output->push_index(e.ind[0], true); + output->push_index(e.ind[1], true); + output->push_index(static_cast(e.ind[1]) + edge_offset, true); + } + }; + + if (t1 == 255) { + add(f0 == 1); + } else { + const u8 f1 = work->tri_flags[e.tri[1]]; + ASSERT(f0 != 77); + ASSERT(f1 != 77); + add(f0 == 1); + add(f1 != 1); + } + } +} + +void calc_shadow_indices(const ShadowCPUInput& input, + ShadowCPUWorkspace* work, + ShadowCPUOutput* output) { + output->num_indices = 0; + output->num_f0_indices = 0; + output->num_f1_indices = 0; + + // HACK + for (auto& f : work->tri_flags) { + f = 77; + } + + transform_vertices(input, work); + calc_dual_verts(input, work); + if (input.scissor_top) { + scissor_top(input, work); + } + scissor_edges(input, work); + find_facing_single_tris(input, work, output, input.model->single_tris); + find_single_edges(input, work, output); + find_facing_double_tris(input, work, output, input.model->double_tris); + find_double_edges(input, work, output); + + for (int i = 0; i < output->num_indices; i++) { + output->indices[i] += input.model->first_vertex; + } +} diff --git a/game/graphics/opengl_renderer/foreground/Shadow3CPU.h b/game/graphics/opengl_renderer/foreground/Shadow3CPU.h new file mode 100644 index 00000000000..80ffc366614 --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Shadow3CPU.h @@ -0,0 +1,45 @@ +#pragma once +#include "common/custom_data/Tfrag3Data.h" +#include "common/math/Vector.h" + +struct ShadowCPUInput { + math::Vector3f origin; + math::Vector4f top_plane, bottom_plane; + math::Vector3f light_dir; + const u8* bones = nullptr; + const tfrag3::ShadowModelFragment* model = nullptr; + std::vector* vertices = nullptr; + bool scissor_top = false; + int debug_highlight_tri = 0; +}; + +struct ShadowCPUOutput { + static constexpr int kMaxIndices = (256 * 3 * 2) + (256 * 3 * 2); + + void push_index(u32 i, bool facing) { + indices[num_indices++] = i; + if (!facing) { + f0_indices[num_f0_indices++] = i; + } else { + f1_indices[num_f1_indices++] = i; + } + } + + int num_indices = 0; + u32 indices[kMaxIndices]; + + int num_f0_indices = 0; + u32 f0_indices[kMaxIndices]; + int num_f1_indices = 0; + int f1_indices[kMaxIndices]; +}; + +struct ShadowCPUWorkspace { + math::Vector4f vertices[tfrag3::ShadowModel::kMaxVertices]; + math::Vector4f dual_vertices[tfrag3::ShadowModel::kMaxVertices]; + u8 tri_flags[tfrag3::ShadowModel::kMaxTris]; +}; + +void calc_shadow_indices(const ShadowCPUInput& input, + ShadowCPUWorkspace* work, + ShadowCPUOutput* output); \ No newline at end of file diff --git a/game/graphics/opengl_renderer/loader/Loader.cpp b/game/graphics/opengl_renderer/loader/Loader.cpp index e0d7beddaef..d8fb73d0974 100644 --- a/game/graphics/opengl_renderer/loader/Loader.cpp +++ b/game/graphics/opengl_renderer/loader/Loader.cpp @@ -152,6 +152,7 @@ void Loader::draw_debug_window() { lev.second->frames_since_last_used); ImGui::Text(" %d textures", (int)lev.second->textures.size()); ImGui::Text(" %d merc", (int)lev.second->merc_model_lookup.size()); + ImGui::Text(" %d shadow", (int)lev.second->shadow_model_lookup.size()); } ImGui::NewLine(); ImGui::Separator(); @@ -267,14 +268,20 @@ const tfrag3::Level& Loader::load_common(TexturePool& tex_pool, const std::strin Timer tim; MercLoaderStage mls; + ShadowLoaderStage sls; LoaderInput input; input.tex_pool = &tex_pool; input.mercs = &m_all_merc_models; + input.shadows = &m_all_shadow_models; input.lev_data = &m_common_level; bool done = false; while (!done) { done = mls.run(tim, input); } + done = false; + while (!done) { + done = sls.run(tim, input); + } return *m_common_level.level; } @@ -416,6 +423,7 @@ void Loader::update(TexturePool& texture_pool) { LoaderInput loader_input; loader_input.lev_data = lev.get(); loader_input.mercs = &m_all_merc_models; + loader_input.shadows = &m_all_shadow_models; loader_input.tex_pool = &texture_pool; for (auto& stage : m_loader_stages) { @@ -496,6 +504,7 @@ void Loader::update(TexturePool& texture_pool) { m_garbage_buffers.push_back(lev->collide_vertices); m_garbage_buffers.push_back(lev->merc_vertices); m_garbage_buffers.push_back(lev->merc_indices); + m_garbage_buffers.push_back(lev->shadow_vertices); for (auto& model : lev->level->merc_data.models) { auto& mercs = m_all_merc_models.at(model.name); @@ -505,6 +514,14 @@ void Loader::update(TexturePool& texture_pool) { mercs.erase(it); } + for (auto& model : lev->level->shadow_data.models) { + auto& shadows = m_all_shadow_models.at(model.name); + ShadowRef ref{&model, lev->load_id}; + auto it = std::find(shadows.begin(), shadows.end(), ref); + ASSERT_MSG(it != shadows.end(), fmt::format("missing shadow: {}\n", model.name)); + shadows.erase(it); + } + m_loaded_tfrag3_levels.erase(*to_unload); } } @@ -544,3 +561,14 @@ std::optional Loader::get_merc_model(const char* model_name) { return std::nullopt; } } + +std::optional Loader::get_shadow_model(const char* model_name) { + // don't think we need to lock here... + const auto& it = m_all_shadow_models.find(model_name); + if (it != m_all_shadow_models.end() && !it->second.empty()) { + // it->second.front().parent_level->frames_since_last_used = 0; + return it->second.front(); + } else { + return std::nullopt; + } +} diff --git a/game/graphics/opengl_renderer/loader/Loader.h b/game/graphics/opengl_renderer/loader/Loader.h index a6fde83998a..dc273e7903f 100644 --- a/game/graphics/opengl_renderer/loader/Loader.h +++ b/game/graphics/opengl_renderer/loader/Loader.h @@ -21,6 +21,7 @@ class Loader { void update_blocking(TexturePool& tex_pool); const LevelData* get_tfrag3_level(const std::string& level_name); std::optional get_merc_model(const char* model_name); + std::optional get_shadow_model(const char* model_name); const tfrag3::Level& load_common(TexturePool& tex_pool, const std::string& name); void set_want_levels(const std::vector& levels); void set_active_levels(const std::vector& levels); @@ -52,6 +53,7 @@ class Loader { std::unordered_map> m_loaded_tfrag3_levels; std::unordered_map> m_all_merc_models; + std::unordered_map> m_all_shadow_models; std::vector m_desired_levels; std::vector m_active_levels; diff --git a/game/graphics/opengl_renderer/loader/LoaderStages.cpp b/game/graphics/opengl_renderer/loader/LoaderStages.cpp index 2a3ff372eb5..31320c97e97 100644 --- a/game/graphics/opengl_renderer/loader/LoaderStages.cpp +++ b/game/graphics/opengl_renderer/loader/LoaderStages.cpp @@ -689,6 +689,47 @@ bool MercLoaderStage::run(Timer& /*timer*/, LoaderInput& data) { return true; } +ShadowLoaderStage::ShadowLoaderStage() : LoaderStage("shadow") {} +void ShadowLoaderStage::reset() { + m_done = false; + m_opengl = false; + m_idx = 0; +} + +bool ShadowLoaderStage::run(Timer& /*timer*/, LoaderInput& data) { + if (m_done) { + return true; + } + + if (!m_opengl) { + glGenBuffers(1, &data.lev_data->shadow_vertices); + glBindBuffer(GL_ARRAY_BUFFER, data.lev_data->shadow_vertices); + glBufferData(GL_ARRAY_BUFFER, + data.lev_data->level->shadow_data.vertices.size() * sizeof(tfrag3::ShadowVertex), + nullptr, GL_STATIC_DRAW); + m_opengl = true; + } + + u32 start = m_idx; + m_idx = std::min(start + 32768, (u32)data.lev_data->level->shadow_data.vertices.size()); + glBindBuffer(GL_ARRAY_BUFFER, data.lev_data->shadow_vertices); + glBufferSubData(GL_ARRAY_BUFFER, start * sizeof(tfrag3::ShadowVertex), + (m_idx - start) * sizeof(tfrag3::ShadowVertex), + data.lev_data->level->shadow_data.vertices.data() + start); + + if (m_idx != data.lev_data->level->shadow_data.vertices.size()) { + return false; + } else { + m_done = true; + for (auto& model : data.lev_data->level->shadow_data.models) { + data.lev_data->shadow_model_lookup[model.name] = &model; + (*data.shadows)[model.name].push_back({&model, data.lev_data->load_id, data.lev_data}); + } + return true; + } + return true; +} + std::vector> make_loader_stages() { std::vector> ret; ret.push_back(std::make_unique()); @@ -697,6 +738,7 @@ std::vector> make_loader_stages() { ret.push_back(std::make_unique()); ret.push_back(std::make_unique()); ret.push_back(std::make_unique()); + ret.push_back(std::make_unique()); ret.push_back(std::make_unique()); ret.push_back(std::make_unique()); return ret; diff --git a/game/graphics/opengl_renderer/loader/LoaderStages.h b/game/graphics/opengl_renderer/loader/LoaderStages.h index a2e1f43e41b..efd50d643c7 100644 --- a/game/graphics/opengl_renderer/loader/LoaderStages.h +++ b/game/graphics/opengl_renderer/loader/LoaderStages.h @@ -16,4 +16,16 @@ class MercLoaderStage : public LoaderStage { bool m_opengl = false; bool m_vtx_uploaded = false; u32 m_idx = 0; +}; + +class ShadowLoaderStage : public LoaderStage { + public: + ShadowLoaderStage(); + bool run(Timer& timer, LoaderInput& data) override; + void reset() override; + + private: + bool m_done = false; + bool m_opengl = false; + u32 m_idx = 0; }; \ No newline at end of file diff --git a/game/graphics/opengl_renderer/loader/common.h b/game/graphics/opengl_renderer/loader/common.h index 31424bb095a..4e537075469 100644 --- a/game/graphics/opengl_renderer/loader/common.h +++ b/game/graphics/opengl_renderer/loader/common.h @@ -28,6 +28,9 @@ struct LevelData { GLuint merc_indices; std::unordered_map merc_model_lookup; + GLuint shadow_vertices; + std::unordered_map shadow_model_lookup; + GLuint hfrag_vertices; GLuint hfrag_indices; @@ -43,10 +46,20 @@ struct MercRef { } }; +struct ShadowRef { + const tfrag3::ShadowModel* model = nullptr; + u64 load_id = 0; + const LevelData* level = nullptr; + bool operator==(const ShadowRef& other) const { + return model == other.model && load_id == other.load_id; + } +}; + struct LoaderInput { LevelData* lev_data; TexturePool* tex_pool; std::unordered_map>* mercs; + std::unordered_map>* shadows; }; class LoaderStage { diff --git a/game/graphics/opengl_renderer/shaders/shadow3.frag b/game/graphics/opengl_renderer/shaders/shadow3.frag new file mode 100644 index 00000000000..a66dc91093a --- /dev/null +++ b/game/graphics/opengl_renderer/shaders/shadow3.frag @@ -0,0 +1,8 @@ +#version 410 core + +out vec4 color; +in vec4 vtx_color; + +void main() { + color = vtx_color; +} diff --git a/game/graphics/opengl_renderer/shaders/shadow3.vert b/game/graphics/opengl_renderer/shaders/shadow3.vert new file mode 100644 index 00000000000..483eb423976 --- /dev/null +++ b/game/graphics/opengl_renderer/shaders/shadow3.vert @@ -0,0 +1,105 @@ +#version 410 core + +// merc vertex definition +layout (location = 0) in vec3 position_in; +layout (location = 1) in float weight_in; +layout (location = 2) in uvec2 mats; +layout (location = 3) in uint flags; + +// camera control +uniform vec4 hvdf_offset; +uniform vec4 fog_constants; +uniform mat4 perspective_matrix; +uniform mat4 camera_rot; +uniform vec3 debug_color; +uniform vec4 bottom_plane; +uniform vec4 top_plane; +uniform vec3 origin; +uniform bool scissor_top; + +// output +out vec4 vtx_color; + +struct MercMatrixData { + mat4 X; +}; + +layout (std140) uniform ub_bones { + MercMatrixData bones[128]; +}; + +/* +- 0 `sub.xyzw vf19, vf01, vf03` : `vf19 = center - vert` +- 1 `mul.xyzw vf11, vf03, vf02` : `vf11 = dot(vert, plane)` +- 2 `mul.xyz vf15, vf19, vf02` : `vf15 = dot3(center - vert, plane)` +- 3 `move.xyzw vf07, vf03` : `vf07 = vert` +- 4 `addy.x vf11, vf11, vf11` : `vf11.x += vf11.y` +- 5 `addy.x vf15, vf15, vf15` : `vf15.x += vf15.y` +- 6 `addz.x vf11, vf11, vf11` : `vf11.x += vf11.z` +- 7 `addz.x vf15, vf15, vf15` : `vf15.x += vf15.z` +- 8 `addw.x vf11, vf11, vf11` : `vf11.x += vf11.w` +- 9 `div Q, vf11.x, vf15.x` : `Q = dot(vert, plane) / dot3(center - vert, plane)` +- 10 `mul.xyzw vf19, vf19, Q` : +- 11 `sub.xyzw vf07, vf07, vf19`: +*/ + +vec4 dual(vec4 p, vec4 plane) { + vec4 offset = vec4(origin, 1) - p; + return p - offset * dot(p, plane) / dot(offset.xyz, plane.xyz); +} + +vec4 scissor(vec4 p, vec4 plane) { + float plane_offset = dot(p, plane); + if (plane_offset > 0) { + vec4 offset = vec4(origin, 1) - p; + return p - offset * plane_offset / dot(offset.xyz, plane.xyz); + } else { + return p; + } +} + +void main() { + float w_debug = 7; + vec4 p = vec4(position_in, 1); + vec4 vtx_pos; + + if (mats[0] == 255) { + // debug hack! + vtx_pos = vec4(position_in, weight_in); + } else { + vtx_pos = bones[mats[0]].X * p * weight_in; + + if (weight_in < 1) { + vtx_pos += bones[mats[1]].X * p * (1.f - weight_in); + } + + w_debug = vtx_pos.w; + if ((flags & uint(1)) != 0) { + vtx_pos = dual(vtx_pos, bottom_plane); + } else { + if (scissor_top) { + vtx_pos = scissor(vtx_pos, top_plane); + } + } + } + + + + vec4 transformed = perspective_matrix * -vtx_pos; + + float Q = fog_constants.x / transformed[3]; + + transformed.xyz *= Q; + transformed.xyz += hvdf_offset.xyz; + transformed.xy -= (2048.); + transformed.z /= (8388608); + transformed.z -= 1; + transformed.x /= (256); + transformed.y /= -(128); + transformed.xyz *= transformed.w; + transformed.y *= SCISSOR_ADJUST * HEIGHT_SCALE; + gl_Position = transformed; + + + vtx_color = vec4(debug_color, w_debug); +} diff --git a/goal_src/jak1/engine/draw/drawable.gc b/goal_src/jak1/engine/draw/drawable.gc index 381192d523c..59e226ca730 100644 --- a/goal_src/jak1/engine/draw/drawable.gc +++ b/goal_src/jak1/engine/draw/drawable.gc @@ -718,7 +718,10 @@ (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s4-1))) (set! (-> a0-26 data 86 total) (-> a0-26 data 86 used))))))) (when #t - (let ((v1-41 *shadow-queue*)) (+! (-> v1-41 cur-run) 1))) + (let ((v1-41 *shadow-queue*)) (+! (-> v1-41 cur-run) 1)) + ;; og:preserve-this + (+! (-> *pc-shadow-queue* cur-run) 1) + ) 0 (none)) @@ -856,7 +859,10 @@ (with-profiler "merc" (set! (-> *merc-global-array* count) (the-as uint 0)) (set! *merc-globals* (the-as merc-globals (-> *merc-global-array* globals))) - (set! (-> *shadow-queue* cur-run) (the-as uint 0))) + (set! (-> *shadow-queue* cur-run) (the-as uint 0)) + ;; og:preserve-this + (set! (-> *pc-shadow-queue* cur-run) 0) + ) ;; draw the background! (with-profiler "background" (init-background) diff --git a/goal_src/jak1/engine/gfx/foreground/bones.gc b/goal_src/jak1/engine/gfx/foreground/bones.gc index cadfe3ed59f..7ef71dfc91d 100644 --- a/goal_src/jak1/engine/gfx/foreground/bones.gc +++ b/goal_src/jak1/engine/gfx/foreground/bones.gc @@ -27,6 +27,27 @@ (defglobalconstant BACKWARD_COMPAT_MERC_CLIP #f) +;; when set, render some environment mapped stuff with jak 2's emerc. +;; this is much faster, and does significantly speed up the game thread on finalboss. +(define *emerc-hack* #t) + +;; when set, use merc for blerc instead of generic. +(define *blerc-hack* #t) + +;; when true, uses the PC float blerc implementation. +(define *use-fp-blerc* #t) + +(define *texscroll-force-generic* #f) + +(define *ripple-force-generic* #f) + +;; when set, use the rewritten PC shadow render (faster) +(define *use-pc-shadow* #t) + +;; use rewritten bones math (GOAL asm instead of mips2c) +(define *use-new-bones* #t) + + ;;;;;;;;;;;;;;;;;; ;; calc list ;;;;;;;;;;;;;;;;;; @@ -326,6 +347,10 @@ (let ((v1-13 (-> gp-1 run (-> gp-1 cur-run)))) (set! (-> v1-13 first) (the-as dma-packet 0)) (set! (-> v1-13 next) (the-as (pointer dma-packet) 0)))) + ;; og:preserve-this + (set! (-> *pc-shadow-queue* run (-> *pc-shadow-queue* cur-run) first) (the pc-shadow-request 0)) + (set! (-> *pc-shadow-queue* run (-> *pc-shadow-queue* cur-run) next) (the (pointer pc-shadow-request) 0)) + 0 (none)) @@ -464,7 +489,6 @@ (.svf (&-> out n-mtx quad 2) nmat2)))) (none)) -(define *use-new-bones* #t) (defun bones-mtx-calc-execute () "Do all pending bone calculations" @@ -660,7 +684,84 @@ (new 'static 'plane :y 1.0 :w 4096.0) :fade-dist 409600.0)) +;; og:preserve-this +(defun pc-draw-bones-shadow ((dc draw-control) (mtx pointer) (dma-ptr pointer)) + "Add shadows for this draw-control to the *pc-shadow-queue* to be drawn in pc-shadow-execute-all. + This places a pc-shadow-request in the DMA buffer and adds it to the linked list of requests + for the currently selected run in *pc-shadow-queue*" + (let* ((pse (the pc-shadow-request dma-ptr)) + (sgeo (-> dc shadow)) + (settings (if (-> dc shadow-ctrl) (-> dc shadow-ctrl settings) *default-shadow-settings*)) + (flags (-> settings flags)) + ) + + ;; if fade is enabled, and we're all the way faded out, disable draw + (when (not (logtest? flags (shadow-flags disable-fade))) + (let ((dist (-> (scratchpad-object terrain-context) work foreground bone-mem work distance w))) + (if (< (-> settings fade-dist) dist) + (logior! flags (shadow-flags disable-draw)) + ) + ) + ) + + ;; if disabled, early return + (if (logtest? flags (shadow-flags disable-draw)) + (return dma-ptr) + ) + + + ;; settings + (mem-copy! (the pointer (-> pse settings)) (the pointer settings) (size-of shadow-settings)) + + ;; update the "center" position. + (let ((center-pos (-> dc skeleton bones (-> dc shadow-joint-index) position))) + (set! (-> pse settings center x) (-> center-pos x)) + (set! (-> pse settings center y) (-> center-pos y)) + (set! (-> pse settings center z) (-> center-pos z)) + ) + + ;; set the other properties + (set! (-> pse geo-name) (-> sgeo name)) + (set! (-> pse mtx) mtx) + (set! (-> pse num-joints) (-> sgeo header num-joints)) + (vector-copy! (-> pse color) (-> *time-of-day-context* current-shadow-color)) + + ;; set up linked list. + (let* ((run (-> *pc-shadow-queue* run (-> *pc-shadow-queue* cur-run))) + (next (-> run next)) + ) + + ;; if we're the first in the list, store in the run + (if (zero? (-> run first)) (set! (-> run first) pse)) + + ;; patch next pointer of previous + (if (nonzero? next) (set! (-> next 0) pse)) + + ;; remember where to patch for the next one + (set! (-> run next) (&-> pse next)) + + ;; clear our next pointer in case we're last + (set! (-> pse next) (the pc-shadow-request 0)) + + ;; set up next tag at the start, to skip over this data. + ;; this is a bit of a hack, this function gets called when building merc chains, + ;; and inserts a bit of shadow dma that will later be referenced by the shadow bucket. + ;; but the original game did the same thing! + (&+! dma-ptr (size-of pc-shadow-request)) + (set! (-> pse dma-next dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (the-as int dma-ptr))) + (set! (-> pse dma-next vif0) (new 'static 'vif-tag)) + (set! (-> pse dma-next vif1) (new 'static 'vif-tag)) + ) + + dma-ptr + ) + ) + (defun draw-bones-shadow ((arg0 draw-control) (arg1 pointer) (arg2 pointer)) + ;; og:preserve-this + (when *use-pc-shadow* + (return (pc-draw-bones-shadow arg0 arg1 arg2)) + ) ;; (local-vars (ra-0 int)) ;; the dma packet we'll use for shadow in the end. (let* ((v1-0 (the-as dma-packet (&+ arg2 0))) @@ -1025,19 +1126,7 @@ (set! dma-buf (the pointer (&+ packet 16)))) dma-buf)) -;; when set, render some environment mapped stuff with jak 2's emerc. -;; this is much faster, and does significantly speed up the game thread on finalboss. -(define *emerc-hack* #t) -;; when set, use merc for blerc instead of generic. -(define *blerc-hack* #t) - -;; when true, uses the PC float blerc implementation. -(define *use-fp-blerc* #t) - -(define *texscroll-force-generic* #f) - -(define *ripple-force-generic* #f) (defun draw-bones ((arg0 draw-control) (dma-buf dma-buffer) (arg2 float)) "Main draw function for all bone-related renderers. Will set up merc, generic and shadow. diff --git a/goal_src/jak1/engine/gfx/shadow/shadow-cpu-h.gc b/goal_src/jak1/engine/gfx/shadow/shadow-cpu-h.gc index 5f9a61523ce..d3f2796dd9b 100644 --- a/goal_src/jak1/engine/gfx/shadow/shadow-cpu-h.gc +++ b/goal_src/jak1/engine/gfx/shadow/shadow-cpu-h.gc @@ -103,6 +103,35 @@ (define *shadow-queue* (new 'global 'shadow-queue)) +;; og:preserve-this +;; new shadow queue for drawing PC shadows. +(declare-type pc-shadow-request structure) +(deftype pc-shadow-request (structure) + ((dma-next dma-packet :inline) + (settings shadow-settings :inline) + (color vector :inline) + (geo-name string) ;; name to send to PC renderer + (mtx pointer) ;; pointer to DMA memory that will contain bones + (num-joints uint32) ;; number of joints needed for shadow + (next pc-shadow-request) + ) + ) + + +(deftype pc-shadow-run (structure) + ((first pc-shadow-request) + (next (pointer pc-shadow-request))) + ) + +(deftype pc-shadow-queue (structure) + ((cur-run uint32) + (run pc-shadow-run 16 :inline) + ) + ) + +(define *pc-shadow-queue* (new 'global 'pc-shadow-queue)) + + (deftype shadow-vertex (structure) ((x float) (y float) diff --git a/goal_src/jak1/engine/gfx/shadow/shadow-cpu.gc b/goal_src/jak1/engine/gfx/shadow/shadow-cpu.gc index a0eeda0a7b5..3f07ade932f 100644 --- a/goal_src/jak1/engine/gfx/shadow/shadow-cpu.gc +++ b/goal_src/jak1/engine/gfx/shadow/shadow-cpu.gc @@ -390,7 +390,38 @@ 0 (none)) +;; og:preserve-this +(defun pc-shadow-execute-all () + "Send PC shadow queue to the PC shadow renderer." + + ;; bail if disabled + (if (not (logtest? *vu1-enable-user* (vu1-renderer-mask shadow))) + (return #f) + ) + + ;; TODO: plus1 here? + (dotimes (run-idx (-> *pc-shadow-queue* cur-run)) + (when (nonzero? (-> *pc-shadow-queue* run run-idx first)) + (with-dma-buffer-add-bucket ((dma-buf (-> (current-frame) global-buf)) (bucket-id shadow)) + (dma-buffer-add-ref-vif2 + dma-buf + 6 + (-> *pc-shadow-queue* run run-idx first) + (new 'static 'vif-tag :cmd (vif-cmd pc-port)) + (new 'static 'vif-tag :cmd (vif-cmd pc-port)) + ) + ) + ) + ) + (none) + ) + (defun shadow-execute-all ((arg0 dma-buffer) (arg1 shadow-queue)) + ;; og:preserve-this + (when *use-pc-shadow* + (pc-shadow-execute-all) + (return #f) + ) (if *debug-segment* (add-frame (-> *display* frames (-> *display* on-screen) frame profile-bar 0) 'draw diff --git a/goal_src/jak2/engine/draw/drawable.gc b/goal_src/jak2/engine/draw/drawable.gc index fa4eac4a7bc..bad9fa22e89 100644 --- a/goal_src/jak2/engine/draw/drawable.gc +++ b/goal_src/jak2/engine/draw/drawable.gc @@ -1449,6 +1449,23 @@ ) ) ) + + ;; og:preserve-this + (let ((v1-0 *pc-shadow-globals*)) + (dotimes (a0-0 2) + (let ((a1-2 (-> v1-0 bucket a0-0))) + (set! (-> a1-2 first) (the-as pointer 0)) + (set! (-> a1-2 next) (the-as pointer 0)) + (set! (-> a1-2 shadow-color) (if (zero? a0-0) + (new 'static 'rgba :r #xf0 :g #xf0 :b #xf0 :a #x80) + (the-as rgba (-> *setting-control* user-current spotlight-color)) + ) + ) + (set! (-> a1-2 constants) (the-as shadow-vu1-constants 0)) + ) + ) + ) + (none) ) diff --git a/goal_src/jak2/engine/gfx/foreground/foreground.gc b/goal_src/jak2/engine/gfx/foreground/foreground.gc index 0321e558d27..28872599bb0 100644 --- a/goal_src/jak2/engine/gfx/foreground/foreground.gc +++ b/goal_src/jak2/engine/gfx/foreground/foreground.gc @@ -13,6 +13,8 @@ (eye 3) ) +;; when set, use the rewritten PC shadow render (faster) +(define *use-pc-shadow* #t) (def-mips2c foreground-draw-hud (function draw-control dma-buffer float none)) @@ -563,8 +565,101 @@ ) ) +;; og:preserve-this +(defun pc-draw-bones-shadow ((dc draw-control) (mtx pointer) (dma-ptr pointer)) + "Add shadows for this draw-control to the *pc-shadow-queue* to be drawn in pc-shadow-execute-all. + This places a pc-shadow-request in the DMA buffer and adds it to the linked list of requests + for the currently selected run in *pc-shadow-queue*" + + (when (-> dc shadow-ctrl) + ;; update the "center" position of the shadow control, I guess to let some other thing look at it? + (let ((center-pos (-> dc skeleton bones (-> dc shadow-joint-index) position)) + (settings (-> dc shadow-ctrl settings)) + ) + (set! (-> settings center x) (-> center-pos x)) + (set! (-> settings center y) (-> center-pos y)) + (set! (-> settings center z) (-> center-pos z)) + ) + ) + (let* ((pse (the pc-shadow-request dma-ptr)) + (sgeo (-> dc shadow)) + (settings (if (-> dc shadow-ctrl) (-> dc shadow-ctrl settings) *default-shadow-settings*)) + (flags (-> settings flags)) + ) + + ;; if fade is enabled, and we're all the way faded out, disable draw + (when (not (logtest? flags (shadow-flags disable-fade))) + (let ((dist (-> (scratchpad-object foreground-work) distance w))) + (#when PC_PORT + (if (not (-> *pc-settings* ps2-shadow?)) + (set! dist 0.0))) + (if (< (-> settings fade-dist) dist) + (logior! flags (shadow-flags disable-draw)) + ) + ) + ) + + ;; if disabled, early return + (if (logtest? flags (shadow-flags disable-draw)) + (return dma-ptr) + ) + + + ;; settings + (mem-copy! (the pointer (-> pse settings)) (the pointer settings) (size-of shadow-settings)) + + ;; update the "center" position. + (let ((center-pos (-> dc skeleton bones (-> dc shadow-joint-index) position))) + (set! (-> pse settings center x) (-> center-pos x)) + (set! (-> pse settings center y) (-> center-pos y)) + (set! (-> pse settings center z) (-> center-pos z)) + ) + + ;; set the other properties + (set! (-> pse geo-name) (-> sgeo name)) + (set! (-> pse mtx) mtx) + (set! (-> pse num-joints) (-> sgeo num-joints)) + ;; TODO: this is probably the wrong color!!! + (vector-copy! (-> pse color) (-> *time-of-day-context* current-shadow-color)) + + ;; set up linked list. + (let* ((run (-> *pc-shadow-globals* bucket (-> settings shadow-type))) + (next (-> run next)) + ) + + ;; if we're the first in the list, store in the run + (when (zero? (-> run first)) + (set! (-> run first) (the pointer pse))) + + ;; patch next pointer of previous + (if (nonzero? next) (set! (-> (the (pointer structure) next) 0) pse)) + + ;; remember where to patch for the next one + (set! (-> run next) (&-> pse next)) + + ;; clear our next pointer in case we're last + (set! (-> pse next) (the pc-shadow-request 0)) + + ;; set up next tag at the start, to skip over this data. + ;; this is a bit of a hack, this function gets called when building merc chains, + ;; and inserts a bit of shadow dma that will later be referenced by the shadow bucket. + ;; but the original game did the same thing! + (&+! dma-ptr (size-of pc-shadow-request)) + (set! (-> pse dma-next dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (the-as int dma-ptr))) + (set! (-> pse dma-next vif0) (new 'static 'vif-tag)) + (set! (-> pse dma-next vif1) (new 'static 'vif-tag)) + ) + + dma-ptr + ) + ) + ;; ERROR: Unsupported inline assembly instruction kind - [lq ra, 32(v1)] (defun foreground-shadow ((draw-ctrl draw-control) (arg1 (inline-array pris-mtx)) (arg2 pointer)) + ;; og:preserve-this + (when *use-pc-shadow* + (return (pc-draw-bones-shadow draw-ctrl (the pointer arg1) arg2)) + ) (local-vars (shadow-settings shadow-settings)) (let ((shadow-geo (-> draw-ctrl shadow)) (dist (-> (scratchpad-object foreground-work) distance w)) diff --git a/goal_src/jak2/engine/gfx/foreground/shadow-cpu-h.gc b/goal_src/jak2/engine/gfx/foreground/shadow-cpu-h.gc index c89f8d374cd..620f6547d0c 100644 --- a/goal_src/jak2/engine/gfx/foreground/shadow-cpu-h.gc +++ b/goal_src/jak2/engine/gfx/foreground/shadow-cpu-h.gc @@ -137,6 +137,21 @@ (define *shadow-globals* (new 'global 'shadow-globals)) +;; og:preserve-this +;; separate copy of shadow-globals for storing only the PC format requests. +(define *pc-shadow-globals* (new 'global 'shadow-globals)) +(declare-type pc-shadow-request structure) +(deftype pc-shadow-request (structure) + ((dma-next dma-packet :inline) + (settings shadow-settings :inline) + (color vector :inline) + (geo-name string) ;; name to send to PC renderer + (mtx pointer) ;; pointer to DMA memory that will contain bones + (num-joints uint32) ;; number of joints needed for shadow + (next pc-shadow-request) + ) + ) + (deftype shadow-vertex (structure) ((x float :offset-assert 0) (y float :offset-assert 4) diff --git a/goal_src/jak2/engine/gfx/foreground/shadow-cpu.gc b/goal_src/jak2/engine/gfx/foreground/shadow-cpu.gc index b0a0102668c..1043355e73c 100644 --- a/goal_src/jak2/engine/gfx/foreground/shadow-cpu.gc +++ b/goal_src/jak2/engine/gfx/foreground/shadow-cpu.gc @@ -641,9 +641,65 @@ (none) ) +;; og:preserve-this +(defun pc-shadow-execute-all () + "Send PC shadow queue to the PC shadow renderer." + + ;; bail if disabled + (if (not (logtest? (-> *display* vu1-enable-user) (vu1-renderer-mask shadow))) + (return #f) + ) + + (dotimes (i 2) + (when (nonzero? (-> *pc-shadow-globals* bucket i first)) + ;; patch the color of each request. + (let ((iter (the pc-shadow-request (-> *pc-shadow-globals* bucket i first))) + (color (new-stack-vector0)) + ) + + (cond + ((= i 0) + (vector-float*! color (-> *time-of-day-context* current-shadow-color) 128.0) + ) + (else + (let ((c (-> *pc-shadow-globals* bucket i shadow-color))) + (set! (-> color x) (the float (-> c r))) + (set! (-> color y) (the float (-> c g))) + (set! (-> color z) (the float (-> c b))) + ) + ) + ) + + (set! (-> color w) (the float (-> *pc-shadow-globals* bucket i shadow-color a))) + + (while (nonzero? iter) + ;(vector-copy! (-> iter color) color) + (set! (-> iter color quad) (-> color quad)) + (set! iter (-> iter next)) + ) + ) + + (with-dma-buffer-add-bucket ((dma-buf (-> (current-frame) global-buf)) (if (zero? i) (bucket-id shadow) (bucket-id shadow2))) + (dma-buffer-add-ref-vif2 + dma-buf + 6 + (-> *pc-shadow-globals* bucket i first) + (new 'static 'vif-tag :cmd (vif-cmd pc-port)) + (new 'static 'vif-tag :cmd (vif-cmd pc-port)) + ) + ) + ) + ) + (none) + ) + ;; ERROR: Failed store: (s.h! (+ v1-24 18) 0) at op 58 ;; ERROR: Failed store: (s.h! (+ v1-24 16) 0) at op 59 (defun shadow-execute-all ((arg0 dma-buffer)) + (when *use-pc-shadow* + (pc-shadow-execute-all) + (return #f) + ) (when *debug-segment* (let ((gp-0 (-> *display* frames (-> *display* on-screen) profile-array data 0)) (v1-7 'shadow) diff --git a/goal_src/jak3/engine/draw/drawable.gc b/goal_src/jak3/engine/draw/drawable.gc index b3aa0b2849b..591ac497b71 100644 --- a/goal_src/jak3/engine/draw/drawable.gc +++ b/goal_src/jak3/engine/draw/drawable.gc @@ -1441,6 +1441,29 @@ ) ) ) + + ;; og:preserve-this + (let ((v1-0 *pc-shadow-globals*)) + (dotimes (a0-0 3) + (let ((a1-2 (-> v1-0 bucket a0-0))) + (set! (-> a1-2 first) (the-as pointer 0)) + (set! (-> a1-2 next) (the-as pointer 0)) + (set! (-> a1-2 shadow-color) (cond + ((zero? a0-0) + (new 'static 'rgba :r #xf0 :g #xf0 :b #xf0 :a #x80) + ) + ((= a0-0 1) + (the-as rgba (-> *setting-control* user-current spotlight-color)) + ) + (else + (the-as rgba (-> *setting-control* user-current highlight-color)) + ) + ) + ) + (set! (-> a1-2 constants) (the-as shadow-vu1-constants 0)) + ) + ) + ) (none) ) diff --git a/goal_src/jak3/engine/gfx/foreground/foreground.gc b/goal_src/jak3/engine/gfx/foreground/foreground.gc index 8dd1133d026..3879ffad674 100644 --- a/goal_src/jak3/engine/gfx/foreground/foreground.gc +++ b/goal_src/jak3/engine/gfx/foreground/foreground.gc @@ -19,6 +19,9 @@ ;; Note: merc mode 5 seems to be totally bogus - the buckets are somewhat random. +;; when set, use the rewritten PC shadow render (faster) +(define *use-pc-shadow* #t) + ;; DECOMP BEGINS (define foreground-vu0-block (new 'static 'vu-function #|:length 9 :qlength 5|#)) @@ -825,8 +828,101 @@ ) ) +;; og:preserve-this +(defun pc-draw-bones-shadow ((dc draw-control) (mtx pointer) (dma-ptr pointer)) + "Add shadows for this draw-control to the *pc-shadow-queue* to be drawn in pc-shadow-execute-all. + This places a pc-shadow-request in the DMA buffer and adds it to the linked list of requests + for the currently selected run in *pc-shadow-queue*" + + (when (-> dc shadow-ctrl) + ;; update the "center" position of the shadow control, I guess to let some other thing look at it? + (let ((center-pos (-> dc skeleton bones (-> dc shadow-joint-index) position)) + (settings (-> dc shadow-ctrl settings)) + ) + (set! (-> settings center x) (-> center-pos x)) + (set! (-> settings center y) (-> center-pos y)) + (set! (-> settings center z) (-> center-pos z)) + ) + ) + (let* ((pse (the pc-shadow-request dma-ptr)) + (sgeo (-> dc shadow)) + (settings (if (-> dc shadow-ctrl) (-> dc shadow-ctrl settings) *default-shadow-settings*)) + (flags (-> settings flags)) + ) + + ;; if fade is enabled, and we're all the way faded out, disable draw + (when (not (logtest? flags (shadow-flags disable-fade))) + (let ((dist (-> (scratchpad-object foreground-work) distance w))) + (#when PC_PORT + (if (not (-> *pc-settings* ps2-shadow?)) + (set! dist 0.0))) + (if (< (-> settings fade-dist) dist) + (logior! flags (shadow-flags disable-draw)) + ) + ) + ) + + ;; if disabled, early return + (if (logtest? flags (shadow-flags disable-draw)) + (return dma-ptr) + ) + + + ;; settings + (mem-copy! (the pointer (-> pse settings)) (the pointer settings) (size-of shadow-settings)) + + ;; update the "center" position. + (let ((center-pos (-> dc skeleton bones (-> dc shadow-joint-index) position))) + (set! (-> pse settings center x) (-> center-pos x)) + (set! (-> pse settings center y) (-> center-pos y)) + (set! (-> pse settings center z) (-> center-pos z)) + ) + + ;; set the other properties + (set! (-> pse geo-name) (-> sgeo name)) + (set! (-> pse mtx) mtx) + (set! (-> pse num-joints) (-> sgeo num-joints)) + ;; TODO: this is probably the wrong color!!! + (vector-copy! (-> pse color) (-> *time-of-day-context* current-shadow-color)) + + ;; set up linked list. + (let* ((run (-> *pc-shadow-globals* bucket (-> settings shadow-type))) + (next (-> run next)) + ) + + ;; if we're the first in the list, store in the run + (when (zero? (-> run first)) + (set! (-> run first) (the pointer pse))) + + ;; patch next pointer of previous + (if (nonzero? next) (set! (-> (the (pointer structure) next) 0) pse)) + + ;; remember where to patch for the next one + (set! (-> run next) (&-> pse next)) + + ;; clear our next pointer in case we're last + (set! (-> pse next) (the pc-shadow-request 0)) + + ;; set up next tag at the start, to skip over this data. + ;; this is a bit of a hack, this function gets called when building merc chains, + ;; and inserts a bit of shadow dma that will later be referenced by the shadow bucket. + ;; but the original game did the same thing! + (&+! dma-ptr (size-of pc-shadow-request)) + (set! (-> pse dma-next dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (the-as int dma-ptr))) + (set! (-> pse dma-next vif0) (new 'static 'vif-tag)) + (set! (-> pse dma-next vif1) (new 'static 'vif-tag)) + ) + + dma-ptr + ) + ) + (defun foreground-shadow ((arg0 draw-control) (arg1 (inline-array pris-mtx)) (arg2 pointer)) "Generate DMA for shadow drawing." + ;; og:preserve-this + (when *use-pc-shadow* + (return (pc-draw-bones-shadow arg0 (the pointer arg1) arg2)) + ) (local-vars (a3-3 shadow-settings)) (let ((v1-0 (-> arg0 shadow)) (t0-0 (-> (scratchpad-object foreground-work) distance w)) diff --git a/goal_src/jak3/engine/gfx/foreground/shadow-cpu-h.gc b/goal_src/jak3/engine/gfx/foreground/shadow-cpu-h.gc index 30f796b63e9..fb7e8dec7ca 100644 --- a/goal_src/jak3/engine/gfx/foreground/shadow-cpu-h.gc +++ b/goal_src/jak3/engine/gfx/foreground/shadow-cpu-h.gc @@ -120,13 +120,28 @@ (define *shadow-globals* (new 'global 'shadow-globals)) - (set! (-> *shadow-globals* bucket 0 bucket-id) (bucket-id shadow)) - (set! (-> *shadow-globals* bucket 1 bucket-id) (bucket-id shadow2)) - (set! (-> *shadow-globals* bucket 2 bucket-id) (bucket-id shadow3)) +;; og:preserve-this +;; separate copy of shadow-globals for storing only the PC format requests. +(define *pc-shadow-globals* (new 'global 'shadow-globals)) +(declare-type pc-shadow-request structure) +(deftype pc-shadow-request (structure) + ((dma-next dma-packet :inline) + (settings shadow-settings :inline) + (color vector :inline) + (geo-name string) ;; name to send to PC renderer + (mtx pointer) ;; pointer to DMA memory that will contain bones + (num-joints uint32) ;; number of joints needed for shadow + (next pc-shadow-request) + ) + ) +(set! (-> *pc-shadow-globals* bucket 0 bucket-id) (bucket-id shadow)) +(set! (-> *pc-shadow-globals* bucket 1 bucket-id) (bucket-id shadow2)) +(set! (-> *pc-shadow-globals* bucket 2 bucket-id) (bucket-id shadow3)) + (deftype shadow-vertex (structure) ((x float) (y float) diff --git a/goal_src/jak3/engine/gfx/foreground/shadow-cpu.gc b/goal_src/jak3/engine/gfx/foreground/shadow-cpu.gc index 42104b62657..9e0b0fc54bc 100644 --- a/goal_src/jak3/engine/gfx/foreground/shadow-cpu.gc +++ b/goal_src/jak3/engine/gfx/foreground/shadow-cpu.gc @@ -659,9 +659,65 @@ (none) ) +;; og:preserve-this +(defun pc-shadow-execute-all () + "Send PC shadow queue to the PC shadow renderer." + + ;; bail if disabled + (if (not (logtest? (-> *display* vu1-enable-user) (vu1-renderer-mask rn32))) + (return #f) + ) + + (dotimes (i 3) + (when (nonzero? (-> *pc-shadow-globals* bucket i first)) + ;; patch the color of each request. + (let ((iter (the pc-shadow-request (-> *pc-shadow-globals* bucket i first))) + (color (new-stack-vector0)) + ) + + (cond + ((= i 0) + (vector-float*! color (-> *time-of-day-context* current-shadow-color) 128.0) + ) + (else + (let ((c (-> *pc-shadow-globals* bucket i shadow-color))) + (set! (-> color x) (the float (-> c r))) + (set! (-> color y) (the float (-> c g))) + (set! (-> color z) (the float (-> c b))) + ) + ) + ) + + (set! (-> color w) (the float (-> *pc-shadow-globals* bucket i shadow-color a))) + + (while (nonzero? iter) + ;(vector-copy! (-> iter color) color) + (set! (-> iter color quad) (-> color quad)) + (set! iter (-> iter next)) + ) + ) + + (with-dma-buffer-add-bucket ((dma-buf (-> (current-frame) global-buf)) (-> *pc-shadow-globals* bucket i bucket-id)) + (dma-buffer-add-ref-vif2 + dma-buf + 6 + (-> *pc-shadow-globals* bucket i first) + (new 'static 'vif-tag :cmd (vif-cmd pc-port)) + (new 'static 'vif-tag :cmd (vif-cmd pc-port)) + ) + ) + ) + ) + (none) + ) + ;; ERROR: Failed store: (s.h! (+ v1-25 18) 0) at op 61 ;; ERROR: Failed store: (s.h! (+ v1-25 16) 0) at op 62 (defun shadow-execute-all ((arg0 dma-buffer)) + (when *use-pc-shadow* + (pc-shadow-execute-all) + (return #f) + ) (when *debug-segment* (let ((gp-0 (-> *display* frames (-> *display* on-screen) profile-array data 0)) (v1-7 'other) diff --git a/scripts/sublime_text/lispindent.sublime-settings b/scripts/sublime_text/lispindent.sublime-settings index 836528e45f3..1275c0717f5 100644 --- a/scripts/sublime_text/lispindent.sublime-settings +++ b/scripts/sublime_text/lispindent.sublime-settings @@ -6,7 +6,7 @@ "regex": ["(with-gensyms|defenum|countdown|while|defglobalconstant|desfun|defsmacro|catch|defvar|defclass|defconstant|defcustom|defparameter|defconst|define-condition|define-modify-macro|", "defsetf|defun|defgeneric|define-setf-method|define-self-expander|defmacro|defsubst|deftype|defmethod|", - "defpackage|defstruct|dolist|dotimes|lambda|let|let\\*|prog1|prog2|unless|when)$"] + "defpackage|defstruct|dolist|dotimes|lambda|let|let\\*|prog1|prog2|unless|when|with-dma-buffer-add-bucket)$"] } } }