feat: add 'user-content-' prefix to support github markdown fragment by kemingy · Pull Request #1750 · lycheeverse/lychee (original) (raw)

The check method is starting to contain quite a lot of fragment business logic. Maybe it's time to move it out?

We could introduce a new struct and move your logic there:

struct FragmentCandidates { candidates: Vec<Cow<'static, str>>, }

impl FragmentCandidates { fn new(fragment: &str, url: &Url, file_type: FileType) -> Result<Self, std::str::Utf8Error> { let mut fragment_candidates = vec![Cow::Owned(fragment.to_string())];

    // For GitHub links, add "user-content-" prefix
    if url.host_str().is_some_and(|host| host.ends_with("github.com")) {
        fragment_candidates.push(format!("user-content-{fragment}").into());
    }
    
    let mut all_fragments = fragment_candidates.clone();
    
    for fragment in &fragment_candidates {
        let mut fragment_decoded = percent_decode_str(fragment).decode_utf8()?;
        if file_type == FileType::Markdown {
            fragment_decoded = fragment_decoded.to_lowercase().into();
        }
        all_fragments.push(fragment_decoded);
    }
    
    Ok(Self { candidates: all_fragments })
}

fn any_matches(&self, file_fragments: &HashSet<String>) -> bool {
    self.candidates.iter().any(|frag| file_fragments.contains(frag.as_ref()))
}

}

And the check method would become:

pub(crate) async fn check(&self, input: FragmentInput, url: &Url) -> Result { let Some(fragment) = url.fragment() else { return Ok(true); }; if fragment.is_empty() || fragment.eq_ignore_ascii_case("top") { return Ok(true); }

let FragmentInput { content, file_type } = input;

if file_type == FileType::Plaintext {
    return Ok(true);
}

let fragment_candidates = FragmentCandidates::new(fragment, url, file_type)?;
let url_without_frag = Self::remove_fragment(url.clone());

let extractor = match file_type {
    FileType::Markdown => extract_markdown_fragments,
    FileType::Html => extract_html_fragments,
    FileType::Plaintext => return Ok(true),
};

match self.cache.lock().await.entry(url_without_frag) {
    Entry::Vacant(entry) => {
        let file_frags = extractor(&content);
        let contains_fragment = fragment_candidates.any_matches(&file_frags);
        entry.insert(file_frags);
        Ok(contains_fragment)
    }
    Entry::Occupied(entry) => {
        Ok(fragment_candidates.any_matches(entry.get()))
    }
}

}

There might be things I'm missing, so it might not compile, but you get the idea. This makes testing way easier. We can then easily add some unit test to FragmentCandidates. Also thought of calling it FragmentGenerator or FragmentBuilder.