栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

大数据算法之实验算法分析一(附代码)

大数据算法之实验算法分析一(附代码)

实验时间:2021年9月30日

目录
  • 实验一:Find a missing value
  • 实验二:Keep a random sample
  • 实验三:Median estimation via Sampling

实验一:Find a missing value


简述算法过程:
A sequence of values a1,…,an,arriving one by one:
If all ai’s are different and have values between 1 and n+1,the missing value=
实验代码:

#include 
#include 
#include
#include 
using namespace std;

class Env
{
public:
    int data[10000];
    int pointer_max = 0;
    int pointer = 0;
    Env()
    {
        // rand_1.csv rand_2.csv rand_3.csv rand_4.csv rand_5.csv  共5个测试用例,请依次测试
        ifstream file("test_data/rand_1.csv", ios::in);     
        if (!file.is_open())
            cout << " 文件打开失败!" << endl;
        string temp;
        while (getline(file, temp))
        {
            data[pointer_max] = atoi(temp.c_str());
            pointer_max++;
        }
        file.close();
    }
    int get_data()
    {   
        if (!done())
            return data[pointer++];
        else
            return -1;
    }
    bool done()
    {
        if (pointer < pointer_max)
            return false;
        else
            return true;
    }
};
// 所有的代码都需要写在 Algo 类内,自行定义其他需要的变量或函数
class Algo
{
public:
    Algo()
    {
        cout << "algo ok" << endl;
    }
    int run(Env env)
    {
        //在此写入你的代码
        int sum=0;
        for(int i=1;i<10001;i++)
        {
            sum=sum+i-env.get_data();
        }   
        //env.get_data() 会返回当前数据流中的数据
        //env.done() 表示数据流是否输出完毕
        return sum; //修改为返回正确的结果
    }
};
int main()
{
    Algo algo;
    Env env;
    time_t start=clock();
    int res=algo.run(env);
    time_t end=clock();
    cout<<"程序结果:"< 

实验结果(含时间、空间、输出结果):
测试样例:




实验二:Keep a random sample


简述算法过程:
Store first a1,…,ak.
When you see ai for i>k,
—Choose an integer j uniformly at random in [1…i].
—if jk,replace the stored value at position j with ai.
The probability of the replacement event is k/i
实验代码:

#include 
#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;
//此处Len即为k
const int FileSize = 5, K = 11;

class Env {
public:
    int data[10000];
    int pointer_max = 0;
    int pointer = 0;
    string s = "0";

    Env(string& str) {
        s = str;
        cout << "文件" << s << endl;
        // rand_1.csv rand_2.csv rand_3.csv rand_4.csv rand_5.csv  共5个测试用例,请依次测试
        ifstream file("C:/Users/30108/Desktop/test_data/rand_" + s + ".csv", ios::in);
        if (!file.is_open())
            cout << " 文件打开失败!" << endl;
        string temp;
        while (getline(file, temp)) {
            data[pointer_max] = atoi(temp.c_str());
            pointer_max++;
        }
        file.close();
    }

    int get_data() {
        if (!done())
            return data[pointer++];
        else
            return -1;
    }
    bool done() {
        if (pointer < pointer_max)
            return false;
        else
            return true;
    }
};

// 所有的代码都需要写在 Algo 类内,自行定义其他需要的变量或函数
class Algo {
public:
    Algo() {
        cout << "algo ok" << endl;
    }

    int run(Env& env) 
    {
        int* a = new int[K + 1]();
        for (size_t i = 1; i != K + 1; i++) {
            a[i] = env.get_data();
        }
        srand(time(NULL));
        for (size_t i = K + 1, temp = env.get_data(), j = 0; temp != -1; i++) 
        {
            j = rand() % i + 1;
            if (j <= K) {
                a[j] = temp;
            }
            temp = env.get_data();
        }
        //排序
        sort(a+ 1, a + K + 1);
        cout << "程序结果:" << endl;
        for (size_t i = 1; i != K + 1; i++) 
        {
            cout << "a"<< i << ":" << a[i] << 't';
            if (i % 6 == 0) 
            {
                cout << endl;
            }
        }
        return 0; 
    }
};

int main() 
{
    Env* env;
    for (int i = 1; i != FileSize + 1; i++)
    {
        string sI = to_string(i);
        env = new Env(sI);
        Algo algo;
        time_t start = clock();
        int res = algo.run(*env);
        time_t end = clock();
        cout << endl;
        cout << "程序耗时: " << double(end - start) / CLOCKS_PER_SEC << " ms" << endl;
        cout << "占用内存:" << sizeof(algo) << " byte" << endl;
        cout << endl;
    }
    system("pause");
    return 0;
}

实验结果(含时间、空间、输出结果):

实验三:Median estimation via Sampling


简述算法过程:
在实验二的基础上求出随机找出11个数的中位数。
实验代码:

#include 
#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;
//此处Len即为k
const int FileSize = 5, K = 11;

class Env {
public:
    int data[10000];
    int pointer_max = 0;
    int pointer = 0;
    string s = "0";

    Env(string& str) {
        s = str;
        cout << "文件" << s << endl;
        // rand_1.csv rand_2.csv rand_3.csv rand_4.csv rand_5.csv  共5个测试用例,请依次测试
        ifstream file("C:/Users/30108/Desktop/test_data/rand_" + s + ".csv", ios::in);
        if (!file.is_open())
            cout << " 文件打开失败!" << endl;
        string temp;
        while (getline(file, temp)) {
            data[pointer_max] = atoi(temp.c_str());
            pointer_max++;
        }
        file.close();
    }

    int get_data() {
        if (!done())
            return data[pointer++];
        else
            return -1;
    }
    bool done() {
        if (pointer < pointer_max)
            return false;
        else
            return true;
    }
};

// 所有的代码都需要写在 Algo 类内,自行定义其他需要的变量或函数
class Algo {
public:
    Algo() {
        cout << "algo ok" << endl;
    }

    int run(Env& env) 
    {
        int* a = new int[K + 1]();
        for (size_t i = 1; i != K + 1; i++) {
            a[i] = env.get_data();
        }
        srand(time(NULL));
        for (size_t i = K + 1, temp = env.get_data(), j = 0; temp != -1; i++) 
        {
            j = rand() % i + 1;
            if (j <= K) {
                a[j] = temp;
            }
            temp = env.get_data();
        }
        //排序
        sort(a+ 1, a + K + 1);
        cout << "程序结果:" << endl;
        for (size_t i = 1; i != K + 1; i++) 
        {
            cout << "a"<< i << ":" << a[i] << 't';
            if (i % 6 == 0) 
                cout << endl;
        }
        //输出中位数
        int median = 0;
        if (K % 2) 
            median = a[K / 2 + 1];
        else
            median = (a[K % 2] + a[K / 2 + 1]) / 2;
        cout << 'n' << "median :  " << median << endl;
        return median; 
    }
};
int main() 
{
    Env* env;
    for (int i = 1; i != FileSize + 1; i++)
    {
        string sI = to_string(i);
        env = new Env(sI);
        Algo algo;
        time_t start = clock();
        int res = algo.run(*env);
        time_t end = clock();
        cout << endl;
        cout << "程序耗时: " << double(end - start) / CLOCKS_PER_SEC << " ms" << endl;
        cout << "占用内存:" << sizeof(algo) << " byte" << endl;
        cout << endl;
    }
    system("pause");
    return 0;
}

实验结果(含时间、空间、输出结果):

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/342397.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号