ChoChoX/obsidian

Fork 0

Files

ChoChoX 224c3dc574 vault backup: 2026-06-13 23:46:22

2026-06-13 23:46:22 +08:00

10 KiB

Raw Blame History

实验01 Linux I/O 编程

实验目的

练习 UNIX I/O 函数（open、close、read、write、lseek）的使用
掌握标准 I/O 函数（fgets、fread、fwrite）的操作方式
建立 API 开销的概念，理解系统调用与库函数的性能差异
熟悉结构体的二进制 I/O 读写方法
综合运用文件 I/O 完成文本处理任务

涉及知识点

文件描述符与 open/close/read/write 系统调用
标准 I/O：fopen/fclose/fgets/fprintf/fread/fwrite
文件打开模式：O_RDONLY、O_WRONLY、O_CREAT、O_TRUNC、O_APPEND
结构体与文件 I/O 结合（二进制序列化）
gettimeofday 高精度计时
字符串处理：strtok、strcmp、strstr、sscanf、%[^:]
排序算法（词频统计中的字典序排列）

任务一：task41.c —— 学生信息文件字段处理

任务要求

创建文件 student.txt，写入若干学生记录，每行格式为 姓名:学号:学院:年龄:性别
从 student.txt 中查找所有属于"计算机与网络安全学院"的记录
将找到的记录字段顺序调整为 学号:姓名:性别:年龄:学院
将调整后的记录写入 csStudent.txt

关键代码提示

#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

int main() {
    // ---- 第一步：创建并写入 student.txt ----
    int fd = open("student.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
    if (fd < 0) { perror("open student.txt"); exit(1); }

    const char *records[] = {
        "张三:2023001:计算机与网络安全学院:20:男\n",
        "李四:2023002:电子信息学院:21:女\n",
        "王五:2023003:计算机与网络安全学院:22:男\n",
        "赵六:2023004:数学学院:19:女\n",
        "钱七:2023005:计算机与网络安全学院:20:男\n",
    };
    for (int i = 0; i < 5; i++)
        write(fd, records[i], strlen(records[i]));
    close(fd);

    // ---- 第二步：读取、筛选、重组字段 ----
    FILE *fin  = fopen("student.txt", "r");
    FILE *fout = fopen("csStudent.txt", "w");
    char line[256];

    while (fgets(line, sizeof(line), fin) != NULL) {
        if (strstr(line, "计算机与网络安全学院") != NULL) {
            char name[64], id[64], college[64], age[16], gender[16];
            sscanf(line, "%[^:]:%[^:]:%[^:]:%[^:]:%s",
                   name, id, college, age, gender);
            // 调整字段顺序：学号:姓名:性别:年龄:学院
            fprintf(fout, "%s:%s:%s:%s:%s\n", id, name, gender, age, college);
        }
    }
    fclose(fin);
    fclose(fout);
    return 0;
}

常见问题

问题	原因	解决方法
`write` 后文件内容为空	忘记 `close`，数据还在内核缓冲区	写完后务必 `close(fd)`
读取中文出现乱码	编码不匹配	确保源文件为 UTF-8 编码，终端 locale 一致
`strtok` 分割结果不对	行末换行符干扰	分割前先去除 `\n`
`sscanf` 读取不完整	格式字符串匹配错误	使用 `%[^:]` 匹配非冒号字符序列

任务二：task42.c —— 结构体二进制文件读写

任务要求

从键盘读入 5 个学生的信息（学号、姓名、语文、数学、英语成绩），存入结构体数组
将结构体数组以二进制方式写入文件 score.dat（使用 write 写入原始字节）
从文件中读取第 1、3、5 条记录并显示

关键代码提示

#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>

typedef struct {
    int   id;
    char  name[32];
    float chinese;
    float math;
    float english;
} Student;

int main() {
    Student stu[5];

    // 从键盘读入
    for (int i = 0; i < 5; i++) {
        printf("请输入第%d个学生(学号 姓名 语文 数学 英语): ", i + 1);
        scanf("%d %s %f %f %f", &stu[i].id, stu[i].name,
              &stu[i].chinese, &stu[i].math, &stu[i].english);
    }

    // 二进制写入
    int fd = open("score.dat", O_WRONLY | O_CREAT | O_TRUNC, 0644);
    write(fd, stu, sizeof(Student) * 5);
    close(fd);

    // 读取第 1、3、5 条（下标 0、2、4）
    fd = open("score.dat", O_RDONLY);
    Student temp;
    for (int i = 0; i < 5; i++) {
        read(fd, &temp, sizeof(Student));
        if (i == 0 || i == 2 || i == 4) {
            printf("学号:%d 姓名:%s 语文:%.1f 数学:%.1f 英语:%.1f\n",
                   temp.id, temp.name, temp.chinese, temp.math, temp.english);
        }
    }

    // 也可用 lseek 精确定位到第 3 条
    lseek(fd, sizeof(Student) * 2, SEEK_SET);
    read(fd, &temp, sizeof(Student));
    printf("第3条: %s\n", temp.name);

    close(fd);
    return 0;
}

常见问题

问题	原因	解决方法
读出的数值不对	结构体内存对齐（padding）	`sizeof(Student)` 可能大于各字段大小之和，属正常现象
`lseek` 定位不准	偏移量计算错误	偏移量 = `sizeof(Student) * (n - 1)`
中文姓名存储异常	`char name[32]` 对 UTF-8 中文不够	增大缓冲区（一个汉字占 3 字节）

任务三：task43.c —— API 执行时间测量（选做）

任务要求

分别测量 read/write 和 fread/fwrite 在不同数据量下的执行时间
对比系统调用与库函数的性能差异
绘制或输出性能对比表

关键代码提示

#include <sys/time.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>

long time_diff(struct timeval *start, struct timeval *end) {
    return (end->tv_sec - start->tv_sec) * 1000000L
         + (end->tv_usec - start->tv_usec);
}

int main() {
    struct timeval start, end;
    int N = 1000000;       // 循环次数
    char buf[1];

    // 测量 write（逐字节）
    int fd = open("test.dat", O_WRONLY | O_CREAT | O_TRUNC, 0644);
    gettimeofday(&start, NULL);
    for (int i = 0; i < N; i++)
        write(fd, buf, 1);
    gettimeofday(&end, NULL);
    close(fd);
    printf("write 逐字节: %ld 微秒\n", time_diff(&start, &end));

    // 测量 fwrite（逐字节，带用户缓冲）
    FILE *fp = fopen("test2.dat", "w");
    gettimeofday(&start, NULL);
    for (int i = 0; i < N; i++)
        fwrite(buf, 1, 1, fp);
    gettimeofday(&end, NULL);
    fclose(fp);
    printf("fwrite 逐字节: %ld 微秒\n", time_diff(&start, &end));

    return 0;
}

测量方案

测量项	操作	说明
`write`	逐字节写 1MB	基准：每次陷入内核
`read`	逐字节读 1MB	基准：每次陷入内核
`fwrite`	逐字节写 1MB	带用户空间缓冲
`fread`	逐字节读 1MB	带用户空间缓冲
`write`	块写入（4KB）	对比块大小影响

常见问题

问题	原因	解决方法
计时结果为 0	操作太快，微秒级精度不够	增加循环次数到百万级
系统调用比库函数慢很多	每次 `read`/`write` 都陷入内核	正常现象，体现用户缓冲的价值
结果波动大	系统调度干扰	多次测量取平均值

任务四：task44.c —— 英文文章词频统计

任务要求

读取一篇英文文章（从文件或标准输入）
统计每个单词出现的次数
输出格式：单词:次数
按字典序排列所有单词
额外输出出现频度最高的 10 个单词

关键代码提示

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define MAX_WORDS 10000

typedef struct {
    char word[64];
    int  count;
} WordEntry;

WordEntry dict[MAX_WORDS];
int dict_size = 0;

// 查找已有单词或插入新单词
int find_or_insert(const char *word) {
    for (int i = 0; i < dict_size; i++) {
        if (strcmp(dict[i].word, word) == 0) {
            dict[i].count++;
            return i;
        }
    }
    strcpy(dict[dict_size].word, word);
    dict[dict_size].count = 1;
    return dict_size++;
}

// qsort 比较函数：字典序
int cmp_alpha(const void *a, const void *b) {
    return strcmp(((WordEntry *)a)->word, ((WordEntry *)b)->word);
}

// qsort 比较函数：频度降序
int cmp_freq(const void *a, const void *b) {
    return ((WordEntry *)b)->count - ((WordEntry *)a)->count;
}

int main() {
    FILE *fp = fopen("article.txt", "r");
    if (!fp) { perror("fopen"); return 1; }

    char word[64];
    while (fscanf(fp, "%63s", word) == 1) {
        // 去除标点，统一小写
        char clean[64];
        int j = 0;
        for (int i = 0; word[i]; i++) {
            if (isalpha(word[i]))
                clean[j++] = tolower(word[i]);
        }
        clean[j] = '\0';
        if (j > 0)
            find_or_insert(clean);
    }
    fclose(fp);

    // 按字典序输出
    qsort(dict, dict_size, sizeof(WordEntry), cmp_alpha);
    for (int i = 0; i < dict_size; i++)
        printf("%s:%d\n", dict[i].word, dict[i].count);

    // 按频度降序输出前 10 个
    qsort(dict, dict_size, sizeof(WordEntry), cmp_freq);
    printf("\n频度最高的10个单词:\n");
    for (int i = 0; i < 10 && i < dict_size; i++)
        printf("%s:%d\n", dict[i].word, dict[i].count);

    return 0;
}

注意事项

单词提取时需过滤标点符号（逗号、句号、引号等）
不区分大小写（统一转为小写）
连字符（如 "well-known"）可按需决定是否拆分
文件较大时注意 MAX_WORDS 的上限，可改用动态分配

常见问题

问题	原因	解决方法
单词带着标点	没有清理非字母字符	用 `isalpha` 逐字符过滤
大小写被当成不同单词	未统一大小写	提取前用 `tolower` 转换
排序结果不对	`qsort` 比较函数写错	注意比较函数的参数类型转换
数组越界	单词数超过 `MAX_WORDS`	动态扩容（`realloc`）或增大数组

实验总结

通过本实验，应掌握以下能力：

熟练使用底层 I/O（open/read/write）和标准 I/O（fopen/fgets/fprintf）
理解文件描述符与 FILE * 的区别
能用结构体进行二进制文件读写
了解系统调用与库函数的性能差异
综合运用字符串处理和文件 I/O 解决实际问题

10 KiB Raw Blame History Unescape Escape

实验01 Linux I/O 编程

实验目的

涉及知识点

任务一：task41.c —— 学生信息文件字段处理

任务要求

关键代码提示

常见问题

任务二：task42.c —— 结构体二进制文件读写

任务要求

关键代码提示

常见问题

任务三：task43.c —— API 执行时间测量（选做）

任务要求

关键代码提示

测量方案

常见问题

任务四：task44.c —— 英文文章词频统计

任务要求

关键代码提示

注意事项

常见问题

实验总结

10 KiB

Raw Blame History