[ty] Reduce number of inline stored definitions per place by MichaReiser · Pull Request #19409 · astral-sh/ruff (original) (raw)

Subject: [PATCH] Reduce inline size of live declarations

Index: crates/ty_python_semantic/src/semantic_index/use_def.rs IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8

diff --git a/crates/ty_python_semantic/src/semantic_index/use_def.rs b/crates/ty_python_semantic/src/semantic_index/use_def.rs --- a/crates/ty_python_semantic/src/semantic_index/use_def.rs (revision b8d2037373ee1aafe0574e4a6d202c609a7de6ad) +++ b/crates/ty_python_semantic/src/semantic_index/use_def.rs (date 1752778903756) @@ -270,7 +270,7 @@ mod place_state; /// Applicable definitions and constraints for every use of a name. -#[derive(Debug, PartialEq, Eq, salsa::Update, get_size2::GetSize)] +#[derive(Debug, PartialEq, Eq, salsa::Update)] pub(crate) struct UseDefMap<'db> { /// Array of [Definition] in this scope. Only the first entry should be [DefinitionState::Undefined]; /// this represents the implicit "unbound"/"undeclared" definition of every place. @@ -341,6 +341,50 @@ end_of_scope_reachability: ScopedReachabilityConstraintId, } +pub(crate) fn use_def_map_size(map: &UseDefMap<'_>) -> usize { + use ruff_db::increment_memory_usage; + + let all_definitions = ::get_size2::GetSize::get_heap_size(&map.all_definitions); + increment_memory_usage("all_definitions", all_definitions); + let predicates = ::get_size2::GetSize::get_heap_size(&map.predicates); + increment_memory_usage("predicates", predicates); + let narrowing_constraints = ::get_size2::GetSize::get_heap_size(&map.narrowing_constraints); + increment_memory_usage("narrowing_constraints", narrowing_constraints); + let reachability_constraints = + ::get_size2::GetSize::get_heap_size(&map.reachability_constraints); + increment_memory_usage("reachability_constraints", reachability_constraints); + let bindings_by_use = ::get_size2::GetSize::get_heap_size(&map.bindings_by_use); + increment_memory_usage("bindings_by_use", bindings_by_use); + let node_reachability = ::get_size2::GetSize::get_heap_size(&map.node_reachability); + increment_memory_usage("node_reachability", node_reachability); + let declarations_by_binding = ::get_size2::GetSize::get_heap_size(&map.declarations_by_binding); + increment_memory_usage("declarations_by_binding", declarations_by_binding); + let bindings_by_definition = ::get_size2::GetSize::get_heap_size(&map.bindings_by_definition); + increment_memory_usage("bindings_by_definition", bindings_by_definition); + let end_of_scope_places = ::get_size2::GetSize::get_heap_size(&map.end_of_scope_places); + increment_memory_usage("end_of_scope_places", end_of_scope_places); + let reachable_definitions = ::get_size2::GetSize::get_heap_size(&map.reachable_definitions); + increment_memory_usage("reachable_definitions", reachable_definitions); + let eager_snapshots = ::get_size2::GetSize::get_heap_size(&map.eager_snapshots); + increment_memory_usage("eager_snapshots", eager_snapshots); + let end_of_scope_reachability = + ::get_size2::GetSize::get_heap_size(&map.end_of_scope_reachability); + increment_memory_usage("end_of_scope_reachability", end_of_scope_reachability); + + 0 + all_definitions + + predicates + + narrowing_constraints + + reachability_constraints + + bindings_by_use + + node_reachability + + declarations_by_binding + + bindings_by_definition + + end_of_scope_places + + reachable_definitions + + eager_snapshots + + end_of_scope_reachability +} + pub(crate) enum ApplicableConstraints<'map, 'db> { UnboundBinding(ConstraintsIterator<'map, 'db>), ConstrainedBindings(BindingWithConstraintsIterator<'map, 'db>), Index: crates/ty_python_semantic/src/semantic_index.rs IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8

diff --git a/crates/ty_python_semantic/src/semantic_index.rs b/crates/ty_python_semantic/src/semantic_index.rs --- a/crates/ty_python_semantic/src/semantic_index.rs (revision b8d2037373ee1aafe0574e4a6d202c609a7de6ad) +++ b/crates/ty_python_semantic/src/semantic_index.rs (date 1752778603598) @@ -5,12 +5,6 @@ use ruff_db::parsed::parsed_module; use ruff_index::{IndexSlice, IndexVec}; -use ruff_python_ast::NodeIndex; -use ruff_python_parser::semantic_errors::SemanticSyntaxError; -use rustc_hash::{FxHashMap, FxHashSet}; -use salsa::Update; -use salsa::plumbing::AsId;

use crate::Db; use crate::module_name::ModuleName; use crate::node_key::NodeKey; @@ -26,7 +20,12 @@ }; use crate::semantic_index::use_def::{EagerSnapshotKey, ScopedEagerSnapshotId, UseDefMap}; use crate::semantic_model::HasTrackedScope; -use crate::util::get_size::untracked_arc_size; +use ruff_db::increment_memory_usage; +use ruff_python_ast::NodeIndex; +use ruff_python_parser::semantic_errors::SemanticSyntaxError; +use rustc_hash::{FxHashMap, FxHashSet}; +use salsa::Update; +use salsa::plumbing::AsId; pub mod ast_ids; mod builder; @@ -49,7 +48,7 @@ /// Returns the semantic index for file. /// /// Prefer using [symbol_table] when working with symbols from a single scope. -#[salsa::tracked(returns(ref), no_eq, heap_size=get_size2::GetSize::get_heap_size)] +#[salsa::tracked(returns(ref), no_eq, heap_size=semantic_index_size)] pub(crate) fn semantic_index(db: &dyn Db, file: File) -> SemanticIndex<'_> { let _span = tracing::trace_span!("semantic_index", ?file).entered(); @@ -93,7 +92,7 @@ /// Using [use_def_map] over [semantic_index] has the advantage that /// Salsa can avoid invalidating dependent queries if this scope's use-def map /// is unchanged. -#[salsa::tracked(returns(deref), heap_size=get_size2::GetSize::get_heap_size)] +#[salsa::tracked(returns(deref), heap_size=use_def_map_heap_size)] pub(crate) fn use_def_map<'db>(db: &'db dyn Db, scope: ScopeId<'db>) -> ArcUseDefMap<'db> { let file = scope.file(db); let _span = tracing::trace_span!("use_def_map", scope=?scope.as_id(), ?file).entered(); @@ -196,7 +195,7 @@ } /// The place tables and use-def maps for all scopes in a file. -#[derive(Debug, Update, get_size2::GetSize)] +#[derive(Debug, Update)] pub(crate) struct SemanticIndex<'db> { /// List of all place tables in this file, indexed by scope. place_tables: IndexVec<FileScopeId, Arc>, @@ -244,6 +243,60 @@ generator_functions: FxHashSet, } +pub(crate) fn semantic_index_size(index: &SemanticIndex<'_>) -> usize { + let place_tables = ::get_size2::GetSize::get_heap_size(&index.place_tables); + increment_memory_usage("places_tables", place_tables); + let scopes = ::get_size2::GetSize::get_heap_size(&index.scopes); + increment_memory_usage("scopes", scopes); + let scopes_by_expression = ::get_size2::GetSize::get_heap_size(&index.scopes_by_expression); + increment_memory_usage("scopes_by_expression", scopes_by_expression); + let definitions_by_node = ::get_size2::GetSize::get_heap_size(&index.definitions_by_node); + increment_memory_usage("definitions_by_node", definitions_by_node); + let expressions_by_node = ::get_size2::GetSize::get_heap_size(&index.expressions_by_node); + increment_memory_usage("expressions_by_node", expressions_by_node); + let scopes_by_node = ::get_size2::GetSize::get_heap_size(&index.scopes_by_node); + increment_memory_usage("scopes_by_node", scopes_by_node); + let scope_ids_by_scope = ::get_size2::GetSize::get_heap_size(&index.scope_ids_by_scope); + increment_memory_usage("scope_ids_by_scope", scope_ids_by_scope); + let use_def_maps = ::get_size2::GetSize::get_heap_size(&index.use_def_maps); + increment_memory_usage("use_def_maps", use_def_maps); + let ast_ids = ::get_size2::GetSize::get_heap_size(&index.ast_ids); + increment_memory_usage("ast_ids", ast_ids); + let imported_modules = ::get_size2::GetSize::get_heap_size(&index.imported_modules); + increment_memory_usage("imported_modules", imported_modules); + let has_future_annotations = ::get_size2::GetSize::get_heap_size(&index.has_future_annotations); + increment_memory_usage("has_future_annotations", has_future_annotations); + let eager_snapshots = ::get_size2::GetSize::get_heap_size(&index.eager_snapshots); + increment_memory_usage("eager_snapshots", eager_snapshots); + let semantic_syntax_errors = ::get_size2::GetSize::get_heap_size(&index.semantic_syntax_errors); + increment_memory_usage("semantic_syntax_errors", semantic_syntax_errors); + let generator_functions = ::get_size2::GetSize::get_heap_size(&index.generator_functions); + increment_memory_usage("generator_functions", generator_functions); + + let total = 0 + + place_tables + + scopes + + scopes_by_expression + + definitions_by_node + + expressions_by_node + + use_def_maps + + ast_ids + + scopes_by_node + + scope_ids_by_scope + + imported_modules + + has_future_annotations + + eager_snapshots + + semantic_syntax_errors + + generator_functions; + + increment_memory_usage("semantic_index", total); + total +} + +pub(crate) fn use_def_map_heap_size(map: &ArcUseDefMap<'_>) -> usize { + crate::semantic_index::use_def::use_def_map_size(&map.inner) +} + impl<'db> SemanticIndex<'db> { /// Returns the place table for a specific scope. /// @@ -521,9 +574,8 @@ } } -#[derive(Debug, PartialEq, Eq, Clone, salsa::Update, get_size2::GetSize)] +#[derive(Debug, PartialEq, Eq, Clone, salsa::Update)] pub(crate) struct ArcUseDefMap<'db> { - #[get_size(size_fn = untracked_arc_size)] inner: Arc<UseDefMap<'db>>, } @@ -543,6 +595,12 @@ } } +impl<'db> get_size2::GetSize for ArcUseDefMap<'db> { + fn get_heap_size(&self) -> usize { + crate::semantic_index::use_def::use_def_map_size(&self.inner) + } +} + pub struct AncestorsIter<'a> { scopes: &'a IndexSlice<FileScopeId, Scope>, next_id: Option, Index: crates/ty/src/lib.rs IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8

diff --git a/crates/ty/src/lib.rs b/crates/ty/src/lib.rs --- a/crates/ty/src/lib.rs (revision b8d2037373ee1aafe0574e4a6d202c609a7de6ad) +++ b/crates/ty/src/lib.rs (date 1752778603581) @@ -22,8 +22,8 @@ use crossbeam::channel as crossbeam_channel; use rayon::ThreadPoolBuilder; use ruff_db::diagnostic::{Diagnostic, DisplayDiagnosticConfig, Severity}; -use ruff_db::max_parallelism; use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf}; +use ruff_db::{max_parallelism, take_memory_usage}; use salsa::plumbing::ZalsaDatabase; use ty_project::metadata::options::ProjectOptionsOverrides; use ty_project:⌚:ProjectWatcher; @@ -155,7 +155,10 @@ match std::env::var(EnvVars::TY_MEMORY_REPORT).as_deref() { Ok("short") => write!(stdout, "{}", db.salsa_memory_dump().display_short())?, Ok("mypy_primer") => write!(stdout, "{}", db.salsa_memory_dump().display_mypy_primer())?, - Ok("full") => write!(stdout, "{}", db.salsa_memory_dump().display_full())?, + Ok("full") => { + write!(stdout, "{}", db.salsa_memory_dump().display_full())?; + write!(stdout, "{:#?}", take_memory_usage())?; + } _ => {} } Index: crates/ruff_db/src/lib.rs IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8

diff --git a/crates/ruff_db/src/lib.rs b/crates/ruff_db/src/lib.rs --- a/crates/ruff_db/src/lib.rs (revision b8d2037373ee1aafe0574e4a6d202c609a7de6ad) +++ b/crates/ruff_db/src/lib.rs (date 1752778603574) @@ -43,6 +43,25 @@ VERSION.set(version) }

+thread_local! {

+} + +pub fn take_memory_usage() -> FxDashMap<String, usize> {

+} + +pub fn increment_memory_usage(ty: &str, amount: usize) {

+} + /// Most basic database that gives access to files, the host system, source code, and parsed AST. #[salsa::db] pub trait Db: salsa::Database {