1
+ use std:: sync:: Arc ;
2
+ use chrono:: offset:: Utc ;
3
+ use tokio:: sync:: Mutex ;
1
4
use mongodb:: bson:: { doc, document:: Document } ;
2
5
//use mongodb::{options::ClientOptions, options::FindOptions, Client, Collection};
3
- use mongodb:: { options:: ClientOptions , options:: FindOptions , Client , Cursor } ;
6
+ use mongodb:: { options:: ClientOptions , options:: FindOptions , options :: InsertManyOptions , Client , Cursor } ;
4
7
//use serde::{Deserialize, Serialize};
5
8
use futures:: StreamExt ;
6
9
//use clap::ArgMatches;
@@ -19,19 +22,32 @@ type BoxResult<T> = std::result::Result<T, Box<dyn error::Error + Send + Sync>>;
19
22
impl DB {
20
23
pub async fn init ( url : & str , db : & str ) -> BoxResult < Self > {
21
24
let mut client_options = ClientOptions :: parse ( url) . await ?;
22
- client_options. app_name = Some ( "json-bucket " . to_string ( ) ) ;
25
+ client_options. app_name = Some ( "mongodb-stream-rs " . to_string ( ) ) ;
23
26
Ok ( Self {
24
27
client : Client :: with_options ( client_options) ?,
25
28
db : db. to_owned ( ) ,
26
29
counter : Counter :: new ( )
27
30
} )
28
31
}
29
32
30
- pub async fn find ( & mut self , collection : & str , query : Document ) -> BoxResult < ( Cursor , f64 ) > {
33
+ pub async fn find ( & mut self , collection : & str , query : Document , bulk_size : Option < u64 > ) -> BoxResult < ( Cursor , f64 ) > {
31
34
// Log which collection this is going into
32
35
log:: debug!( "Reading {}.{}" , self . db, collection) ;
33
36
37
+ let batch_size = match bulk_size {
38
+ Some ( bulk_size) => {
39
+ if bulk_size > 64000 {
40
+ log:: info!( "Setting mongo cursor batch_size to 64000" ) ;
41
+ Some ( 64000u32 )
42
+ } else {
43
+ Some ( bulk_size as u32 )
44
+ }
45
+ } ,
46
+ None => None
47
+ } ;
48
+
34
49
let find_options = FindOptions :: builder ( )
50
+ . batch_size ( batch_size)
35
51
. sort ( doc ! { "_id" : -1 } )
36
52
. build ( ) ;
37
53
@@ -48,27 +64,93 @@ impl DB {
48
64
pub async fn insert_cursor ( & mut self , collection : & str , mut cursor : Cursor , total : f64 ) -> BoxResult < ( ) > {
49
65
let coll = self . client . database ( & self . db ) . collection ( collection) ;
50
66
self . counter . total ( total) ;
51
- log:: info!( "Inserting {} docs" , total) ;
67
+ log:: info!( "Inserting {} docs to {}.{}" , total, self . db, collection) ;
68
+
69
+ // Get timestamp
70
+ let start = Utc :: now ( ) . timestamp ( ) ;
71
+
52
72
while let Some ( doc) = cursor. next ( ) . await {
53
73
match doc {
54
74
Ok ( doc) => {
55
75
match coll. insert_one ( doc, None ) . await {
56
76
Ok ( id) => {
57
- log:: info !( "Inserted id: {}" , id. inserted_id. to_string( ) ) ;
77
+ log:: debug !( "Inserted id: {}" , id. inserted_id. to_string( ) ) ;
58
78
}
59
79
Err ( e) => {
60
- log:: error!( "Got error: {}" , e) ;
80
+ log:: debug!( "Got error: {}" , e) ;
81
+ }
82
+ }
83
+ self . counter . incr ( & self . db , collection, 1.0 , start) ;
84
+ }
85
+ Err ( e) => {
86
+ log:: error!( "Caught error getting next doc, skipping: {}" , e) ;
87
+ continue ;
88
+ }
89
+ } ;
90
+ }
91
+ log:: info!( "Completed {}.{}" , self . db, collection) ;
92
+ Ok ( ( ) )
93
+ }
94
+
95
+ pub async fn bulk_insert_cursor ( & mut self , collection : & str , mut cursor : Cursor , total : f64 , bulk_count : usize ) -> BoxResult < ( ) > {
96
+ let coll = self . client . database ( & self . db ) . collection ( collection) ;
97
+ self . counter . total ( total) ;
98
+ log:: info!( "Bulk inserting {} docs to {}.{} in batches of {}" , total, self . db, collection, bulk_count) ;
99
+
100
+ let insert_many_options = InsertManyOptions :: builder ( )
101
+ . ordered ( Some ( false ) )
102
+ . build ( ) ;
103
+
104
+ // Create vector of documents to bulk upload
105
+ // let mut bulk = Bulk::new(bulk_count);
106
+ let mut bulk: Vec < Document > = Vec :: with_capacity ( bulk_count) ;
107
+
108
+ // Get timestamp
109
+ let start = Utc :: now ( ) . timestamp ( ) ;
110
+
111
+ while let Some ( doc) = cursor. next ( ) . await {
112
+ match doc {
113
+ Ok ( d) => {
114
+ bulk. push ( d) ;
115
+ if bulk. len ( ) >= bulk. capacity ( ) {
116
+ match coll. insert_many ( * bulk, insert_many_options. clone ( ) ) . await {
117
+ Ok ( _) => {
118
+ log:: debug!( "Bulk inserted {} docs" , bulk_count) ;
119
+ }
120
+ Err ( e) => {
121
+ log:: debug!( "Got error with insertMany: {}" , e) ;
122
+ }
61
123
}
124
+ bulk. clear ( ) ;
125
+ self . counter . incr ( & self . db , collection, bulk_count as f64 , start) ;
126
+ } else {
127
+ continue
62
128
}
63
- self . counter . incr ( & self . db , collection) ;
129
+ // if bulk.push(d).await {
130
+ // log::debug!("Bulk inserting {} docs", bulk_count);
131
+ // let values = bulk.get().await;
132
+ // match coll.insert_many(values, insert_many_options.clone()).await {
133
+ // Ok(_) => {
134
+ // log::debug!("Bulk inserted {} docs", bulk_count);
135
+ // }
136
+ // Err(e) => {
137
+ // log::debug!("Got error with insertMany: {}", e);
138
+ // }
139
+ // }
140
+ // bulk.clear().await;
141
+ // self.counter.incr(&self.db, collection, bulk_count as f64, start);
142
+ // } else {
143
+ // log::debug!("inserted doc: {}/{}", bulk.len().await, bulk_count);
144
+ // continue
145
+ // }
64
146
}
65
147
Err ( e) => {
66
148
log:: error!( "Caught error getting next doc, skipping: {}" , e) ;
67
149
continue ;
68
150
}
69
151
} ;
70
152
}
71
- println ! ( "Completed {}.{}" , self . db, collection) ;
153
+ log :: info !( "Completed {}.{}" , self . db, collection) ;
72
154
Ok ( ( ) )
73
155
}
74
156
@@ -125,21 +207,67 @@ impl Counter {
125
207
Counter {
126
208
count : 0.0 ,
127
209
marker : 0.0 ,
128
- total : 0.0
210
+ total : 0.0 ,
129
211
}
130
212
}
131
213
132
214
pub fn total ( & mut self , total : f64 ) {
133
215
self . total = total;
134
216
}
135
217
136
- pub fn incr ( & mut self , db : & str , collection : & str ) {
137
- self . count += 1f64 ;
218
+ pub fn incr ( & mut self , db : & str , collection : & str , count : f64 , start : i64 ) {
219
+ self . count += count ;
138
220
let percent = self . count / self . total * 100.0 ;
139
221
222
+ // Get time elapsed
223
+ let now = Utc :: now ( ) . timestamp ( ) ;
224
+ let delta = now - start;
225
+
226
+ // Get insert rate
227
+ let rate = self . count / delta as f64 ;
228
+
140
229
if percent - self . marker > 1.0 {
141
- println ! ( "Copying {}.{}: {:.2}%" , db, collection, percent) ;
230
+ log :: info !( "{}.{}: {:.2}%, {:.2}/s, {}/{} " , db, collection, percent, rate , self . count , self . total ) ;
142
231
self . marker += 1f64 ;
143
232
} ;
144
233
}
145
234
}
235
+
236
+ #[ derive( Debug , Clone ) ]
237
+ pub struct Bulk {
238
+ pub inner : Arc < Mutex < Vec < Document > > >
239
+ }
240
+
241
+ impl Bulk {
242
+ pub fn new ( size : usize ) -> Bulk {
243
+ Bulk {
244
+ inner : Arc :: new ( Mutex :: new ( Vec :: with_capacity ( size) ) )
245
+ }
246
+ }
247
+
248
+ pub async fn push ( & mut self , doc : Document ) -> bool {
249
+ let mut me = self . inner . lock ( ) . await ;
250
+ me. push ( doc) ;
251
+
252
+ if me. len ( ) >= me. capacity ( ) {
253
+ return true
254
+ } else {
255
+ return false
256
+ }
257
+ }
258
+
259
+ pub async fn len ( & self ) -> usize {
260
+ let me = self . inner . lock ( ) . await ;
261
+ me. len ( )
262
+ }
263
+
264
+ pub async fn get ( & self ) -> Vec < Document > {
265
+ let me = self . inner . lock ( ) . await ;
266
+ me. to_vec ( )
267
+ }
268
+
269
+ pub async fn clear ( & self ) {
270
+ let mut me = self . inner . lock ( ) . await ;
271
+ me. clear ( )
272
+ }
273
+ }
0 commit comments