Initial commit
This commit is contained in:
commit
6a206031fb
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
/target
|
1678
Cargo.lock
generated
Normal file
1678
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
10
Cargo.toml
Normal file
10
Cargo.toml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
[package]
|
||||||
|
name = "markify-rs"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
reqwest = { version = "0.11", features = ["blocking", "json"] }
|
||||||
|
scraper = "0.12"
|
38
src/main.rs
Normal file
38
src/main.rs
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
use reqwest::header::{HeaderMap, USER_AGENT};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let args: Vec<String> = std::env::args().collect();
|
||||||
|
if args.len() < 2 {
|
||||||
|
eprintln!("Usage: {} <URL> [g]", args[0]);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let url = &args[1];
|
||||||
|
let use_google_bot = args.get(2).map(|s| s == "g").unwrap_or(false);
|
||||||
|
|
||||||
|
let mut headers = HeaderMap::new();
|
||||||
|
if use_google_bot {
|
||||||
|
headers.insert(USER_AGENT, "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)".parse()?);
|
||||||
|
}
|
||||||
|
|
||||||
|
let client = reqwest::blocking::Client::builder().default_headers(headers).build()?;
|
||||||
|
let res = client.get(url).send()?;
|
||||||
|
|
||||||
|
let body = res.text()?;
|
||||||
|
|
||||||
|
let document = Html::parse_document(&body);
|
||||||
|
let title_selector = Selector::parse("title").unwrap();
|
||||||
|
let title = document.select(&title_selector).next().map(|title| title.text().collect::<Vec<_>>().join(""));
|
||||||
|
|
||||||
|
match title {
|
||||||
|
Some(title) => {
|
||||||
|
println!("[{}]({})", title.trim(), url);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
eprintln!("Title tag not found in the response from {}", url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
Loading…
Reference in a new issue