Skip to content
This repository was archived by the owner on Nov 4, 2024. It is now read-only.

Commit 0e29dee

Browse files
committed
updated
1 parent e4b83d6 commit 0e29dee

File tree

2 files changed

+170
-16
lines changed

2 files changed

+170
-16
lines changed

src/db.rs

Lines changed: 140 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
use std::sync::Arc;
2+
use chrono::offset::Utc;
3+
use tokio::sync::Mutex;
14
use mongodb::bson::{doc, document::Document};
25
//use mongodb::{options::ClientOptions, options::FindOptions, Client, Collection};
3-
use mongodb::{options::ClientOptions, options::FindOptions, Client, Cursor};
6+
use mongodb::{options::ClientOptions, options::FindOptions, options::InsertManyOptions, Client, Cursor};
47
//use serde::{Deserialize, Serialize};
58
use futures::StreamExt;
69
//use clap::ArgMatches;
@@ -19,19 +22,32 @@ type BoxResult<T> = std::result::Result<T, Box<dyn error::Error + Send + Sync>>;
1922
impl DB {
2023
pub async fn init(url: &str, db: &str) -> BoxResult<Self> {
2124
let mut client_options = ClientOptions::parse(url).await?;
22-
client_options.app_name = Some("json-bucket".to_string());
25+
client_options.app_name = Some("mongodb-stream-rs".to_string());
2326
Ok(Self {
2427
client: Client::with_options(client_options)?,
2528
db: db.to_owned(),
2629
counter: Counter::new()
2730
})
2831
}
2932

30-
pub async fn find(&mut self, collection: &str, query: Document) -> BoxResult<(Cursor, f64)> {
33+
pub async fn find(&mut self, collection: &str, query: Document, bulk_size: Option<u64>) -> BoxResult<(Cursor, f64)> {
3134
// Log which collection this is going into
3235
log::debug!("Reading {}.{}", self.db, collection);
3336

37+
let batch_size = match bulk_size {
38+
Some(bulk_size) => {
39+
if bulk_size > 64000 {
40+
log::info!("Setting mongo cursor batch_size to 64000");
41+
Some(64000u32)
42+
} else {
43+
Some(bulk_size as u32)
44+
}
45+
},
46+
None => None
47+
};
48+
3449
let find_options = FindOptions::builder()
50+
.batch_size(batch_size)
3551
.sort(doc! { "_id": -1 })
3652
.build();
3753

@@ -48,27 +64,93 @@ impl DB {
4864
pub async fn insert_cursor(&mut self, collection: &str, mut cursor: Cursor, total: f64) -> BoxResult<()> {
4965
let coll = self.client.database(&self.db).collection(collection);
5066
self.counter.total(total);
51-
log::info!("Inserting {} docs", total);
67+
log::info!("Inserting {} docs to {}.{}", total, self.db, collection);
68+
69+
// Get timestamp
70+
let start = Utc::now().timestamp();
71+
5272
while let Some(doc) = cursor.next().await {
5373
match doc {
5474
Ok(doc) => {
5575
match coll.insert_one(doc, None).await {
5676
Ok(id) => {
57-
log::info!("Inserted id: {}", id.inserted_id.to_string());
77+
log::debug!("Inserted id: {}", id.inserted_id.to_string());
5878
}
5979
Err(e) => {
60-
log::error!("Got error: {}", e);
80+
log::debug!("Got error: {}", e);
81+
}
82+
}
83+
self.counter.incr(&self.db, collection, 1.0, start);
84+
}
85+
Err(e) => {
86+
log::error!("Caught error getting next doc, skipping: {}", e);
87+
continue;
88+
}
89+
};
90+
}
91+
log::info!("Completed {}.{}", self.db, collection);
92+
Ok(())
93+
}
94+
95+
pub async fn bulk_insert_cursor(&mut self, collection: &str, mut cursor: Cursor, total: f64, bulk_count: usize) -> BoxResult<()> {
96+
let coll = self.client.database(&self.db).collection(collection);
97+
self.counter.total(total);
98+
log::info!("Bulk inserting {} docs to {}.{} in batches of {}", total, self.db, collection, bulk_count);
99+
100+
let insert_many_options = InsertManyOptions::builder()
101+
.ordered(Some(false))
102+
.build();
103+
104+
// Create vector of documents to bulk upload
105+
// let mut bulk = Bulk::new(bulk_count);
106+
let mut bulk: Vec<Document> = Vec::with_capacity(bulk_count);
107+
108+
// Get timestamp
109+
let start = Utc::now().timestamp();
110+
111+
while let Some(doc) = cursor.next().await {
112+
match doc {
113+
Ok(d) => {
114+
bulk.push(d);
115+
if bulk.len() >= bulk.capacity() {
116+
match coll.insert_many(*bulk, insert_many_options.clone()).await {
117+
Ok(_) => {
118+
log::debug!("Bulk inserted {} docs", bulk_count);
119+
}
120+
Err(e) => {
121+
log::debug!("Got error with insertMany: {}", e);
122+
}
61123
}
124+
bulk.clear();
125+
self.counter.incr(&self.db, collection, bulk_count as f64, start);
126+
} else {
127+
continue
62128
}
63-
self.counter.incr(&self.db, collection);
129+
// if bulk.push(d).await {
130+
// log::debug!("Bulk inserting {} docs", bulk_count);
131+
// let values = bulk.get().await;
132+
// match coll.insert_many(values, insert_many_options.clone()).await {
133+
// Ok(_) => {
134+
// log::debug!("Bulk inserted {} docs", bulk_count);
135+
// }
136+
// Err(e) => {
137+
// log::debug!("Got error with insertMany: {}", e);
138+
// }
139+
// }
140+
// bulk.clear().await;
141+
// self.counter.incr(&self.db, collection, bulk_count as f64, start);
142+
// } else {
143+
// log::debug!("inserted doc: {}/{}", bulk.len().await, bulk_count);
144+
// continue
145+
// }
64146
}
65147
Err(e) => {
66148
log::error!("Caught error getting next doc, skipping: {}", e);
67149
continue;
68150
}
69151
};
70152
}
71-
println!("Completed {}.{}", self.db, collection);
153+
log::info!("Completed {}.{}", self.db, collection);
72154
Ok(())
73155
}
74156

@@ -125,21 +207,67 @@ impl Counter {
125207
Counter {
126208
count: 0.0,
127209
marker: 0.0,
128-
total: 0.0
210+
total: 0.0,
129211
}
130212
}
131213

132214
pub fn total(&mut self, total: f64) {
133215
self.total = total;
134216
}
135217

136-
pub fn incr(&mut self, db: &str, collection: &str) {
137-
self.count += 1f64;
218+
pub fn incr(&mut self, db: &str, collection: &str, count: f64, start: i64) {
219+
self.count += count;
138220
let percent = self.count / self.total * 100.0;
139221

222+
// Get time elapsed
223+
let now = Utc::now().timestamp();
224+
let delta = now - start;
225+
226+
// Get insert rate
227+
let rate = self.count / delta as f64;
228+
140229
if percent - self.marker > 1.0 {
141-
println!("Copying {}.{}: {:.2}%", db, collection, percent);
230+
log::info!("{}.{}: {:.2}%, {:.2}/s, {}/{}", db, collection, percent, rate, self.count, self.total);
142231
self.marker += 1f64;
143232
};
144233
}
145234
}
235+
236+
#[derive(Debug, Clone)]
237+
pub struct Bulk {
238+
pub inner: Arc<Mutex<Vec<Document>>>
239+
}
240+
241+
impl Bulk {
242+
pub fn new(size: usize) -> Bulk {
243+
Bulk {
244+
inner: Arc::new(Mutex::new(Vec::with_capacity(size)))
245+
}
246+
}
247+
248+
pub async fn push(&mut self, doc: Document) -> bool {
249+
let mut me = self.inner.lock().await;
250+
me.push(doc);
251+
252+
if me.len() >= me.capacity() {
253+
return true
254+
} else {
255+
return false
256+
}
257+
}
258+
259+
pub async fn len(&self) -> usize {
260+
let me = self.inner.lock().await;
261+
me.len()
262+
}
263+
264+
pub async fn get(&self) -> Vec<Document> {
265+
let me = self.inner.lock().await;
266+
me.to_vec()
267+
}
268+
269+
pub async fn clear(&self) {
270+
let mut me = self.inner.lock().await;
271+
me.clear()
272+
}
273+
}

src/main.rs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,16 @@ async fn main() -> BoxResult<()> {
5555
.help("MongoDB Collection")
5656
.takes_value(true),
5757
)
58+
.arg(
59+
Arg::with_name("bulk")
60+
.short("b")
61+
.long("bulk")
62+
.required(false)
63+
.value_name("STREAM_BULK")
64+
.env("STREAM_BULK")
65+
.help("Bulk stream documents")
66+
.takes_value(true)
67+
)
5868
.get_matches();
5969

6070
// Initialize log Builder
@@ -69,7 +79,7 @@ async fn main() -> BoxResult<()> {
6979
)
7080
})
7181
.target(Target::Stdout)
72-
.filter_level(LevelFilter::Error)
82+
.filter_level(LevelFilter::Info)
7383
.parse_default_env()
7484
.init();
7585

@@ -78,6 +88,10 @@ async fn main() -> BoxResult<()> {
7888
let destination= &opts.value_of("destination_uri").unwrap();
7989
let collection = &opts.value_of("collection").unwrap();
8090
let db = &opts.value_of("db").unwrap();
91+
let bulk = match opts.is_present("bulk") {
92+
true => Some(opts.value_of("bulk").unwrap().parse::<u32>()?),
93+
false => None
94+
};
8195

8296
println!(
8397
"Starting mongodb-stream-rs:{}",
@@ -88,10 +102,22 @@ async fn main() -> BoxResult<()> {
88102
let mut source_db = DB::init(&source, &db).await?;
89103
let mut destination_db = DB::init(&destination, &db).await?;
90104

91-
// Acquire cursor from source
92-
let (source_cursor,total) = source_db.find(collection, doc!{}).await?;
105+
// If bulk flag is set, use insertMany
106+
match bulk {
107+
Some(bulk_size) => {
108+
// Acquire cursor from source
109+
let (source_cursor,total) = source_db.find(collection, doc!{}, Some(bulk_size as u64)).await?;
110+
111+
destination_db.bulk_insert_cursor(collection, source_cursor, total, bulk_size as usize).await?;
112+
}
113+
None => {
114+
// Acquire cursor from source
115+
let (source_cursor,total) = source_db.find(collection, doc!{}, None).await?;
116+
117+
destination_db.insert_cursor(collection, source_cursor, total).await?
118+
}
93119

94-
destination_db.insert_cursor(collection, source_cursor, total).await?;
120+
};
95121

96122
Ok(())
97123
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy