tor_multipath_voip/bench/bench1/parse.js

463 lines
14 KiB
JavaScript

const fs = require('fs')
const Promise = require('bluebird')
const threshold = (rttl, thrshld) =>
rttl.map(circuit => {
const too_high_dur = []
let already_too_high = false
circuit.forEach(rtt => {
const too_high = rtt > thrshld
if (too_high) {
if (!already_too_high) {
already_too_high = true
too_high_dur.push(0)
}
too_high_dur[too_high_dur.length - 1]++
} else {
already_too_high = false
}
})
return too_high_dur
})
const multi_threshold = rttl => {
const lim = [0,2,20,600].reverse()
const limtxt = ["0-1 s", "2-19 s", "20-559 s","600-inf s"]
const thresholdv = [100, 250, 500, 1000]
const thresholdtxt = thresholdv.map(t => `${t}ms`)
const answr = thresholdv
.map((th, idx, thtable) =>
threshold(rttl, th)
.reduce((acc, e) => acc.concat(e), [])
.reduce((acc, e) => {
lim.some((l, i) => {
if (e >= l) {
acc[i]++
return true
}
return false
})
return acc
}, [0,0,0,0])
.reverse()
.map((v, i, t) => v / t.reduce((a,b) => a+b))
.map((v, i) => [thresholdtxt[idx], limtxt[i], v])
)
.reduce((acc, e) => acc.concat(e), [])
return answr
}
const flip = data => {
const res = {}
Object
.keys(data[0])
.forEach(k => {
res[k] = data.map(e => e[k])
})
return res
}
const split_in_perc = ar => new Object({
min: ar[0],
_5th: ar[Math.floor((ar.length - 1) * 0.05)],
_25th: ar[Math.floor((ar.length - 1) * 0.25)],
med: ar[Math.floor((ar.length - 1) * 0.50)],
_75th: ar[Math.floor((ar.length - 1) * 0.75)],
_95th: ar[Math.floor((ar.length - 1) * 0.95)],
max: ar[ar.length - 1]
})
const comp_perc = (hsl, rttl) =>
rttl.map((rtt, idx) => {
rtt.sort((a, b) => a - b)
const hs = hsl[idx]
const sprtt = split_in_perc(rtt)
return [
[hs, "min", sprtt.min],
[hs, "5th", sprtt._5th],
[hs, "25th", sprtt._25th],
[hs, "med", sprtt.med],
[hs, "75th", sprtt._75th],
[hs, "95th", sprtt._95th],
[hs, "max", sprtt.max]
]
})
const comp_delta = (hsl, rttl) =>
comp_perc(hsl, rttl)
.map(percentiles => [
[percentiles[0][0], "max - min", percentiles[6][2] - percentiles[0][2]],
[percentiles[0][0], "95th - 5th", percentiles[5][2] - percentiles[1][2]],
[percentiles[0][0], "75th - 25th", percentiles[4][2] - percentiles[2][2]]
])
const pick_circuits = (rtt, n) =>
Array(n)
.fill(0)
.map(v => Math.floor(Math.random() * rtt.length))
.map(v => rtt[v])
const prepare_circuit_merge = group =>
group
.reduce((acc, circuit) =>
circuit.reduce((acc2, v, idx) => {
if (!acc2[idx]) acc2[idx] = []
acc2[idx].push(v)
return acc2
}, acc)
, Array(Math.max.apply(null, group.map(v => v.length))).fill(null))
const new_circuits = (group, k) =>
prepare_circuit_merge(group)
.map(values => {
const c = values
.sort((a,b) => a-b)
.filter(a => a != null)
return c[Math.min(c.length - 1, k)]
})
const new_circuits_with_pkt = (group, k, delay) =>
prepare_circuit_merge(group)
.filter(v => v.length >= k)
.reduce((acc, v, idx) => {
if (idx % k == 0) acc.push([])
const pos = idx % k
acc[acc.length-1].push(new Object({ arriv: pos * delay + v[pos], idx: pos}))
if ((idx + 1) % k == 0) acc[acc.length-1] = acc[acc.length-1].concat(v.slice(k).map(rtt => new Object({ arriv: pos * delay + rtt, idx: null})))
return acc
}, [])
.filter(v => v.length >= k)
.map(v =>
v
.sort((a,b) => a.arriv - b.arriv) // Now we sort packets according to their arrival time considering that they were not sent at the seme time
.slice(0, k) // We only keep the k first received packets, as they are the only needed one to decode our stuff
.reduce((acc, v, idx, tab) => {
if (acc.length == 0) acc = Array(tab.length).fill(tab[tab.length - 1].arriv) // init accumulator with last significative received value
if (v.idx) acc[v.idx] = v.arriv
return acc
}, []) // If we receive a packet with idx, it is a clear packet and can be delivered now, however we will need to receive k packets to decode
.reduce((acc, v) => {
if (acc.length > 0) acc.push(Math.max(acc[acc.length - 1], v))
else acc.push(v)
return acc
}, []) // Implement Head of Line blocking, as packets must be delivered in order.
.map((v, idx) => v - idx * delay) // Reset time according to packet emission and not first packet encoded
)
.reduce((acc, v) => acc.concat(v), []) // flatten data
const latency_per_real_time_sec = (aggreg, include_onion) =>
aggreg
.reduce((acc2, v) =>
v.reduce((acc, [onion, end, start, rtt]) => {
const in_sec = Math.floor(start / 1000)
if (acc.start > in_sec) acc.start = in_sec
if (!acc.latencies[in_sec]) acc.latencies[in_sec] = []
acc.latencies[in_sec].push(include_onion ? [onion, rtt] : rtt)
return acc
}, acc2), {start: Math.floor(Date.now() / 1000), latencies: []})
const latency_histo = lat_per_sec =>
lat_per_sec
.latencies
.slice(lat_per_sec.start)
//.forEach((v, idx) => console.log(idx+lat_per_sec.start,v))
.map((v, idx) => new Object({
time: idx+lat_per_sec.start,
count: v.length,
avg: split_in_perc(v.sort((a,b) => a-b))
}))
.map(v => Object.keys(v.avg).map(p => [v.time, v.count, p, v.avg[p]]))
const med_by_circuit = perc => {
const res = {}
perc.forEach(([min,fith,quart,[med_hs, med_name, med_rtt],...rest]) => res[med_hs] = med_rtt)
return res
}
const med_threshold = (lat_per_sec, med_by_circ, mult) =>
lat_per_sec
.latencies
.slice(lat_per_sec.start)
.map((v, idx) => new Object({
time: idx+lat_per_sec.start,
count: v.length,
above_th: v.reduce((acc, [onion, rtt]) => {
return rtt > mult * med_by_circ[onion] ? acc + 1 : acc
}, 0) / v.length
}))
const circuit_over_time = rttl =>
Array(600)
.fill(null)
.map((_v, idx) =>
rttl
.map(circuit => circuit[idx] ? circuit[idx] : null)
.filter(v => v != null)
.sort((a,b) => a - b)
)
.map(v => split_in_perc(v))
.map(v => Object.keys(v).map(p => [p, v[p]]))
const passe_bas = (ar, th) => ar.filter(circ => circ[0] <= th)
const order_circuit_time = aggreg => aggreg.sort((c1, c2) => c1[0][2] - c2[0][2])
const pick_circuits_ord = (aggreg_ord, n, max_difference) => {
const pivot = Math.floor(Math.random() * aggreg_ord.size())
}
fs.readdir(process.argv[2], (err, items) => {
if (err) {
console.error(err)
return
}
const started = new Date(parseInt(items[0].match(/^\d+/)[0]))
const stopped = new Date(parseInt(items[items.length - 1].match(/^\d+/)[0]))
stopped.setMinutes(stopped.getMinutes() - 30)
const cut = stopped.getTime()
const result = items.filter(e => {
if (!e.match(/onion/)) return false
const current = parseInt(e.match(/^\d+/)[0])
return current <= cut
})
console.error(`${result.length} accepted results`)
console.error(`Started at ${started.toString()} and stopped at ${stopped.toString()}`)
Promise.map(result, f => new Promise((resolve, reject) => {
fs.readFile(`${process.argv[2]}/${f}`, 'utf-8', (err, data) => {
if (err) {
reject(err)
return
}
const aggregation = []
const rtt = []
let res
const reg = /delta_timestamp, (\d+), ([\d\w.]+), (\d+), (\d+), (\d+)\n/g
while (res = reg.exec(data)) {
aggregation.push([
res[2], // hs
parseInt(res[4]), // end
parseInt(res[5]), // start
parseInt(res[4]) - parseInt(res[5]) // rtt
])
rtt.push(parseInt(res[4]) - parseInt(res[5]))
}
const same_buffer = []
const reg2 = /#info aggregation: (\d+) was aggregated with (\d+) for ([\w\d.]+)/g
while (res = reg2.exec(data)) {
same_buffer.push({
current: parseInt(res[1]),
orig: parseInt(res[2]),
hs: res[3]
})
}
resolve({
aggreg: aggregation,
rtt: rtt,
hs: aggregation[0][0],
failed_at: rtt.length,
same_buffer: same_buffer
})
})
}), {concurrency: 512})
.then(data => {
({aggreg, rtt, hs, failed_at, same_buffer} = flip(data))
//const [new_rtt, new_hs] = generate_new_rtt(7000, 1, 2, 500, 0, rtt)
//out_percentiles(new_hs, new_rtt)
out_ordered_median(hs,rtt,'med')
//out_raw()
})
.catch(console.error)
})
const sample = (a,n) => ((w) =>
Array(n)
.fill(null)
.map(v => w.splice(Math.floor(Math.random() * w.length), 1)[0])
)(JSON.parse(JSON.stringify(a)))
const out_lat_real_time = aggreg => {
console.log("time,count,type,rtt")
latency_histo(latency_per_real_time_sec(aggreg))
.reduce((acc, v) => acc.concat(v), [])
.forEach(a => console.log(`${a[0]},${a[1]},${a[2]},${a[3]}`))
}
const out_circ_over_measurement = rtt => {
console.log("type,rtt,time")
circuit_over_time(rtt)
.reduce((acc,v,idx) => acc.concat(v.map(w => w.concat([idx]))), [])
.forEach(v => console.log(`${v[0]}, ${v[1]}, ${v[2]} `))
}
const out_above_median = aggreg => {
const lprlt = latency_per_real_time_sec(aggreg, true)
const mbc = med_by_circuit(comp_perc(hs, rtt))
console.log("time,count,above_th")
med_threshold(lprlt, mbc, 2)
.forEach(a => console.log(`${a.time},${a.count},${a.above_th}`))
}
const out_raw = aggreg => {
console.log("onion,end,start,delta")
aggreg
.reduce((acc, val) => acc.concat(val), [])
.forEach(e => console.log(`${e[0]}, ${e[1]}, ${e[2]}, ${e[3]}`))
}
const out_percentiles = (hs, rtt) => {
console.log("hs,type,delta")
comp_perc(hs, rtt)
.reduce((acc, rtt) => acc.concat(rtt), [])
.forEach(e => console.log(`${e[0]}, ${e[1]}, ${e[2]}`))
}
const out_delta_percentiles = (hs, rtt) => {
console.log("hs,type,delta")
comp_delta(hs,rtt)
.reduce((acc, delta) => acc.concat(delta), [])
.forEach(e => console.log(`${e[0]}, ${e[1]}, ${e[2]}`))
}
const out_failed_at = failed_at => {
console.log("failed_at")
failed_at.forEach(e => console.log(e))
}
const out_latency_spikes = rtt => {
console.log("threshold,group,count")
multi_threshold(rtt).forEach(l => console.log(`${l[0]},${l[1]},${l[2]}`))
}
const generate_new_rtt = (generate, k, n, seuil, delay, rtt) => {
const pre_filter = passe_bas(rtt, seuil)
const new_rtt = Array(generate)
.fill(null)
.map(v => pick_circuits(pre_filter, n))
.map(v => new_circuits(v, k - 1))
//.map(v => new_circuits_with_pkt(v, k, delay)) //20ms delay
.filter(v => {
if (v.length <= 597 || v.length > 600) {
console.error(`Wrong final size ${v.length}`)
return false
}
return true
})
const new_hs = Array(generate).fill(null).map((v, idx) => `${idx}.onion`)
return [new_rtt, new_hs]
}
const out_pike_corel = aggreg => {
const n = 3
const max_difference = 10 * 1000 // 10s in ms
const aggreg_ord = order_circuit_time(aggreg)
shift_to_match(pick_circuits_ord(aggreg_ord, n, max_difference))
}
const out_ecdf_first_rtt = rtt => rtt.map(v => v[0]).forEach(v => console.log(v))
const out_low_pass_pct = (rtt, max_rtt) => console.log(`${max_rtt}ms keeps`, passe_bas(rtt, max_rtt).length / rtt.length)
const out_hol = same_buffer => {
const sbres = same_buffer.map(sb =>
sb.reduce((acc, v) => {
if (acc.hs == null) acc.hs = v.hs
if (v.orig != acc.orig && v.orig != acc.last - 1) {
acc.orig = v.orig
acc.l.push(0)
}
if (v.orig == acc.last - 1) acc.orig = v.orig
acc.last = v.current
acc.l[acc.l.length - 1] += 1
return acc
}, {l: [], last: null, orig: null, hs: null})
)
.reduce((acc, sb) => acc.concat(sb), [])
.map(v => v.l)
//.filter(v => v.length > 0)
//console.log(sbres.length)
//const hol = sbres.reduce((acc, sb) => acc.concat(sb), []).sort((a, b) => b - a))
console.log("hol")
sbres.map(v => v.reduce((a,b) => a+b,0)).forEach(v => console.log(v))
}
const out_ordered_median = (hs, rtt, percname) => {
const pos = ["min", "_5th", "_25th", "med", "_75th", "_95th", "max"].indexOf(percname)
console.log("hs,type,rtt,rank")
comp_perc(hs, rtt)
.filter(v => Math.random() < 0.1)
.sort((a, b) => a[pos][2] - b[pos][2]) // Sort by [percname][rtt]
.map((circuit, idx) => circuit.map(entry => entry.concat([idx]))) // Add rank to each value
.reduce((acc, v) => acc.concat(v), []) // Reduce it to have a data structure similar to our future CSV
.forEach(v => console.log(`${v[0]}, ${v[1]}, ${v[2]}, ${v[3]}`)) // Output CSV line by line
}
const predict = () => {
const split_at = 60
const too_long = 3000
const predict = rtt
.map(circuit => new Object({seen: circuit.slice(0,split_at), to_predict: circuit.slice(split_at) }))
.filter(({seen, to_predict}) => Math.max.apply(null, seen) <= too_long)
.map(({seen, to_predict}) => new Object({
seen: seen,
computed: Object.assign(split_in_perc(seen.sort((a,b) => a-b)), {avg: seen.reduce((a,b) => a+b)/seen.length }),
good: to_predict.every(v => v <= too_long)
//good: Math.max.apply(null, to_predict)
}))
.map(v => {
v.computed.delta_max = v.computed.max - v.computed.min
v.computed.delta_5 = v.computed._95th - v.computed._5th
v.computed.delta_25 = v.computed._75th - v.computed._25th
v.computed.stddev =
Math.sqrt(
v.seen
.map(rtt => Math.pow(rtt - v.computed.avg, 2))
.reduce((a,b) => a+b)
/ v.seen.length)
return v
})
const predict_keys = ['good'].concat(Object.keys(predict[0].computed))
console.log(predict_keys.join())
predict
.forEach(({seen,computed,good}) =>
console.log(([good].concat(predict_keys.slice(1).map(k => computed[k])).join())))
/*
console.log("good,rtt0,rtt1,rtt2,rtt3,rtt4,rtt5,rtt6,rtt7,rtt8,rtt9")
rtt
.filter(v => v.length == 600)
.map(v => [v.some(r => r > 2000) ? 'f' : 't'].concat(v.slice(0,10)))
.map(v => v.join())
.forEach(v => console.log(v))
*/
/*
const new_sb = same_buffer
.filter((v, idx) => rtt[idx][0] <= 500)
out_hol(new_sb)
*/
}