338 lines
10 KiB
Markdown
338 lines
10 KiB
Markdown
|
|
# 实验01 Linux I/O 编程
|
|||
|
|
|
|||
|
|
## 实验目的
|
|||
|
|
|
|||
|
|
1. 练习 UNIX I/O 函数(`open`、`close`、`read`、`write`、`lseek`)的使用
|
|||
|
|
2. 掌握标准 I/O 函数(`fgets`、`fread`、`fwrite`)的操作方式
|
|||
|
|
3. 建立 API 开销的概念,理解系统调用与库函数的性能差异
|
|||
|
|
4. 熟悉结构体的二进制 I/O 读写方法
|
|||
|
|
5. 综合运用文件 I/O 完成文本处理任务
|
|||
|
|
|
|||
|
|
## 涉及知识点
|
|||
|
|
|
|||
|
|
- 文件描述符与 `open`/`close`/`read`/`write` 系统调用
|
|||
|
|
- 标准 I/O:`fopen`/`fclose`/`fgets`/`fprintf`/`fread`/`fwrite`
|
|||
|
|
- 文件打开模式:`O_RDONLY`、`O_WRONLY`、`O_CREAT`、`O_TRUNC`、`O_APPEND`
|
|||
|
|
- 结构体与文件 I/O 结合(二进制序列化)
|
|||
|
|
- `gettimeofday` 高精度计时
|
|||
|
|
- 字符串处理:`strtok`、`strcmp`、`strstr`、`sscanf`、`%[^:]`
|
|||
|
|
- 排序算法(词频统计中的字典序排列)
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 任务一:task41.c —— 学生信息文件字段处理
|
|||
|
|
|
|||
|
|
### 任务要求
|
|||
|
|
|
|||
|
|
1. 创建文件 `student.txt`,写入若干学生记录,每行格式为 `姓名:学号:学院:年龄:性别`
|
|||
|
|
2. 从 `student.txt` 中查找所有属于"计算机与网络安全学院"的记录
|
|||
|
|
3. 将找到的记录字段顺序调整为 `学号:姓名:性别:年龄:学院`
|
|||
|
|
4. 将调整后的记录写入 `csStudent.txt`
|
|||
|
|
|
|||
|
|
### 关键代码提示
|
|||
|
|
|
|||
|
|
```c
|
|||
|
|
#include <fcntl.h>
|
|||
|
|
#include <unistd.h>
|
|||
|
|
#include <string.h>
|
|||
|
|
#include <stdio.h>
|
|||
|
|
#include <stdlib.h>
|
|||
|
|
|
|||
|
|
int main() {
|
|||
|
|
// ---- 第一步:创建并写入 student.txt ----
|
|||
|
|
int fd = open("student.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|||
|
|
if (fd < 0) { perror("open student.txt"); exit(1); }
|
|||
|
|
|
|||
|
|
const char *records[] = {
|
|||
|
|
"张三:2023001:计算机与网络安全学院:20:男\n",
|
|||
|
|
"李四:2023002:电子信息学院:21:女\n",
|
|||
|
|
"王五:2023003:计算机与网络安全学院:22:男\n",
|
|||
|
|
"赵六:2023004:数学学院:19:女\n",
|
|||
|
|
"钱七:2023005:计算机与网络安全学院:20:男\n",
|
|||
|
|
};
|
|||
|
|
for (int i = 0; i < 5; i++)
|
|||
|
|
write(fd, records[i], strlen(records[i]));
|
|||
|
|
close(fd);
|
|||
|
|
|
|||
|
|
// ---- 第二步:读取、筛选、重组字段 ----
|
|||
|
|
FILE *fin = fopen("student.txt", "r");
|
|||
|
|
FILE *fout = fopen("csStudent.txt", "w");
|
|||
|
|
char line[256];
|
|||
|
|
|
|||
|
|
while (fgets(line, sizeof(line), fin) != NULL) {
|
|||
|
|
if (strstr(line, "计算机与网络安全学院") != NULL) {
|
|||
|
|
char name[64], id[64], college[64], age[16], gender[16];
|
|||
|
|
sscanf(line, "%[^:]:%[^:]:%[^:]:%[^:]:%s",
|
|||
|
|
name, id, college, age, gender);
|
|||
|
|
// 调整字段顺序:学号:姓名:性别:年龄:学院
|
|||
|
|
fprintf(fout, "%s:%s:%s:%s:%s\n", id, name, gender, age, college);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
fclose(fin);
|
|||
|
|
fclose(fout);
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 常见问题
|
|||
|
|
|
|||
|
|
| 问题 | 原因 | 解决方法 |
|
|||
|
|
|------|------|----------|
|
|||
|
|
| `write` 后文件内容为空 | 忘记 `close`,数据还在内核缓冲区 | 写完后务必 `close(fd)` |
|
|||
|
|
| 读取中文出现乱码 | 编码不匹配 | 确保源文件为 UTF-8 编码,终端 locale 一致 |
|
|||
|
|
| `strtok` 分割结果不对 | 行末换行符干扰 | 分割前先去除 `\n` |
|
|||
|
|
| `sscanf` 读取不完整 | 格式字符串匹配错误 | 使用 `%[^:]` 匹配非冒号字符序列 |
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 任务二:task42.c —— 结构体二进制文件读写
|
|||
|
|
|
|||
|
|
### 任务要求
|
|||
|
|
|
|||
|
|
1. 从键盘读入 5 个学生的信息(学号、姓名、语文、数学、英语成绩),存入结构体数组
|
|||
|
|
2. 将结构体数组以二进制方式写入文件 `score.dat`(使用 `write` 写入原始字节)
|
|||
|
|
3. 从文件中读取第 1、3、5 条记录并显示
|
|||
|
|
|
|||
|
|
### 关键代码提示
|
|||
|
|
|
|||
|
|
```c
|
|||
|
|
#include <fcntl.h>
|
|||
|
|
#include <unistd.h>
|
|||
|
|
#include <stdio.h>
|
|||
|
|
|
|||
|
|
typedef struct {
|
|||
|
|
int id;
|
|||
|
|
char name[32];
|
|||
|
|
float chinese;
|
|||
|
|
float math;
|
|||
|
|
float english;
|
|||
|
|
} Student;
|
|||
|
|
|
|||
|
|
int main() {
|
|||
|
|
Student stu[5];
|
|||
|
|
|
|||
|
|
// 从键盘读入
|
|||
|
|
for (int i = 0; i < 5; i++) {
|
|||
|
|
printf("请输入第%d个学生(学号 姓名 语文 数学 英语): ", i + 1);
|
|||
|
|
scanf("%d %s %f %f %f", &stu[i].id, stu[i].name,
|
|||
|
|
&stu[i].chinese, &stu[i].math, &stu[i].english);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 二进制写入
|
|||
|
|
int fd = open("score.dat", O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|||
|
|
write(fd, stu, sizeof(Student) * 5);
|
|||
|
|
close(fd);
|
|||
|
|
|
|||
|
|
// 读取第 1、3、5 条(下标 0、2、4)
|
|||
|
|
fd = open("score.dat", O_RDONLY);
|
|||
|
|
Student temp;
|
|||
|
|
for (int i = 0; i < 5; i++) {
|
|||
|
|
read(fd, &temp, sizeof(Student));
|
|||
|
|
if (i == 0 || i == 2 || i == 4) {
|
|||
|
|
printf("学号:%d 姓名:%s 语文:%.1f 数学:%.1f 英语:%.1f\n",
|
|||
|
|
temp.id, temp.name, temp.chinese, temp.math, temp.english);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 也可用 lseek 精确定位到第 3 条
|
|||
|
|
lseek(fd, sizeof(Student) * 2, SEEK_SET);
|
|||
|
|
read(fd, &temp, sizeof(Student));
|
|||
|
|
printf("第3条: %s\n", temp.name);
|
|||
|
|
|
|||
|
|
close(fd);
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 常见问题
|
|||
|
|
|
|||
|
|
| 问题 | 原因 | 解决方法 |
|
|||
|
|
|------|------|----------|
|
|||
|
|
| 读出的数值不对 | 结构体内存对齐(padding) | `sizeof(Student)` 可能大于各字段大小之和,属正常现象 |
|
|||
|
|
| `lseek` 定位不准 | 偏移量计算错误 | 偏移量 = `sizeof(Student) * (n - 1)` |
|
|||
|
|
| 中文姓名存储异常 | `char name[32]` 对 UTF-8 中文不够 | 增大缓冲区(一个汉字占 3 字节) |
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 任务三:task43.c —— API 执行时间测量(选做)
|
|||
|
|
|
|||
|
|
### 任务要求
|
|||
|
|
|
|||
|
|
1. 分别测量 `read`/`write` 和 `fread`/`fwrite` 在不同数据量下的执行时间
|
|||
|
|
2. 对比系统调用与库函数的性能差异
|
|||
|
|
3. 绘制或输出性能对比表
|
|||
|
|
|
|||
|
|
### 关键代码提示
|
|||
|
|
|
|||
|
|
```c
|
|||
|
|
#include <sys/time.h>
|
|||
|
|
#include <fcntl.h>
|
|||
|
|
#include <unistd.h>
|
|||
|
|
#include <stdio.h>
|
|||
|
|
#include <stdlib.h>
|
|||
|
|
|
|||
|
|
long time_diff(struct timeval *start, struct timeval *end) {
|
|||
|
|
return (end->tv_sec - start->tv_sec) * 1000000L
|
|||
|
|
+ (end->tv_usec - start->tv_usec);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int main() {
|
|||
|
|
struct timeval start, end;
|
|||
|
|
int N = 1000000; // 循环次数
|
|||
|
|
char buf[1];
|
|||
|
|
|
|||
|
|
// 测量 write(逐字节)
|
|||
|
|
int fd = open("test.dat", O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|||
|
|
gettimeofday(&start, NULL);
|
|||
|
|
for (int i = 0; i < N; i++)
|
|||
|
|
write(fd, buf, 1);
|
|||
|
|
gettimeofday(&end, NULL);
|
|||
|
|
close(fd);
|
|||
|
|
printf("write 逐字节: %ld 微秒\n", time_diff(&start, &end));
|
|||
|
|
|
|||
|
|
// 测量 fwrite(逐字节,带用户缓冲)
|
|||
|
|
FILE *fp = fopen("test2.dat", "w");
|
|||
|
|
gettimeofday(&start, NULL);
|
|||
|
|
for (int i = 0; i < N; i++)
|
|||
|
|
fwrite(buf, 1, 1, fp);
|
|||
|
|
gettimeofday(&end, NULL);
|
|||
|
|
fclose(fp);
|
|||
|
|
printf("fwrite 逐字节: %ld 微秒\n", time_diff(&start, &end));
|
|||
|
|
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 测量方案
|
|||
|
|
|
|||
|
|
| 测量项 | 操作 | 说明 |
|
|||
|
|
|--------|------|------|
|
|||
|
|
| `write` | 逐字节写 1MB | 基准:每次陷入内核 |
|
|||
|
|
| `read` | 逐字节读 1MB | 基准:每次陷入内核 |
|
|||
|
|
| `fwrite` | 逐字节写 1MB | 带用户空间缓冲 |
|
|||
|
|
| `fread` | 逐字节读 1MB | 带用户空间缓冲 |
|
|||
|
|
| `write` | 块写入(4KB) | 对比块大小影响 |
|
|||
|
|
|
|||
|
|
### 常见问题
|
|||
|
|
|
|||
|
|
| 问题 | 原因 | 解决方法 |
|
|||
|
|
|------|------|----------|
|
|||
|
|
| 计时结果为 0 | 操作太快,微秒级精度不够 | 增加循环次数到百万级 |
|
|||
|
|
| 系统调用比库函数慢很多 | 每次 `read`/`write` 都陷入内核 | 正常现象,体现用户缓冲的价值 |
|
|||
|
|
| 结果波动大 | 系统调度干扰 | 多次测量取平均值 |
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 任务四:task44.c —— 英文文章词频统计
|
|||
|
|
|
|||
|
|
### 任务要求
|
|||
|
|
|
|||
|
|
1. 读取一篇英文文章(从文件或标准输入)
|
|||
|
|
2. 统计每个单词出现的次数
|
|||
|
|
3. 输出格式:`单词:次数`
|
|||
|
|
4. 按字典序排列所有单词
|
|||
|
|
5. 额外输出出现频度最高的 10 个单词
|
|||
|
|
|
|||
|
|
### 关键代码提示
|
|||
|
|
|
|||
|
|
```c
|
|||
|
|
#include <stdio.h>
|
|||
|
|
#include <string.h>
|
|||
|
|
#include <stdlib.h>
|
|||
|
|
#include <ctype.h>
|
|||
|
|
|
|||
|
|
#define MAX_WORDS 10000
|
|||
|
|
|
|||
|
|
typedef struct {
|
|||
|
|
char word[64];
|
|||
|
|
int count;
|
|||
|
|
} WordEntry;
|
|||
|
|
|
|||
|
|
WordEntry dict[MAX_WORDS];
|
|||
|
|
int dict_size = 0;
|
|||
|
|
|
|||
|
|
// 查找已有单词或插入新单词
|
|||
|
|
int find_or_insert(const char *word) {
|
|||
|
|
for (int i = 0; i < dict_size; i++) {
|
|||
|
|
if (strcmp(dict[i].word, word) == 0) {
|
|||
|
|
dict[i].count++;
|
|||
|
|
return i;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
strcpy(dict[dict_size].word, word);
|
|||
|
|
dict[dict_size].count = 1;
|
|||
|
|
return dict_size++;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// qsort 比较函数:字典序
|
|||
|
|
int cmp_alpha(const void *a, const void *b) {
|
|||
|
|
return strcmp(((WordEntry *)a)->word, ((WordEntry *)b)->word);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// qsort 比较函数:频度降序
|
|||
|
|
int cmp_freq(const void *a, const void *b) {
|
|||
|
|
return ((WordEntry *)b)->count - ((WordEntry *)a)->count;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int main() {
|
|||
|
|
FILE *fp = fopen("article.txt", "r");
|
|||
|
|
if (!fp) { perror("fopen"); return 1; }
|
|||
|
|
|
|||
|
|
char word[64];
|
|||
|
|
while (fscanf(fp, "%63s", word) == 1) {
|
|||
|
|
// 去除标点,统一小写
|
|||
|
|
char clean[64];
|
|||
|
|
int j = 0;
|
|||
|
|
for (int i = 0; word[i]; i++) {
|
|||
|
|
if (isalpha(word[i]))
|
|||
|
|
clean[j++] = tolower(word[i]);
|
|||
|
|
}
|
|||
|
|
clean[j] = '\0';
|
|||
|
|
if (j > 0)
|
|||
|
|
find_or_insert(clean);
|
|||
|
|
}
|
|||
|
|
fclose(fp);
|
|||
|
|
|
|||
|
|
// 按字典序输出
|
|||
|
|
qsort(dict, dict_size, sizeof(WordEntry), cmp_alpha);
|
|||
|
|
for (int i = 0; i < dict_size; i++)
|
|||
|
|
printf("%s:%d\n", dict[i].word, dict[i].count);
|
|||
|
|
|
|||
|
|
// 按频度降序输出前 10 个
|
|||
|
|
qsort(dict, dict_size, sizeof(WordEntry), cmp_freq);
|
|||
|
|
printf("\n频度最高的10个单词:\n");
|
|||
|
|
for (int i = 0; i < 10 && i < dict_size; i++)
|
|||
|
|
printf("%s:%d\n", dict[i].word, dict[i].count);
|
|||
|
|
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 注意事项
|
|||
|
|
|
|||
|
|
- 单词提取时需过滤标点符号(逗号、句号、引号等)
|
|||
|
|
- 不区分大小写(统一转为小写)
|
|||
|
|
- 连字符(如 "well-known")可按需决定是否拆分
|
|||
|
|
- 文件较大时注意 `MAX_WORDS` 的上限,可改用动态分配
|
|||
|
|
|
|||
|
|
### 常见问题
|
|||
|
|
|
|||
|
|
| 问题 | 原因 | 解决方法 |
|
|||
|
|
|------|------|----------|
|
|||
|
|
| 单词带着标点 | 没有清理非字母字符 | 用 `isalpha` 逐字符过滤 |
|
|||
|
|
| 大小写被当成不同单词 | 未统一大小写 | 提取前用 `tolower` 转换 |
|
|||
|
|
| 排序结果不对 | `qsort` 比较函数写错 | 注意比较函数的参数类型转换 |
|
|||
|
|
| 数组越界 | 单词数超过 `MAX_WORDS` | 动态扩容(`realloc`)或增大数组 |
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 实验总结
|
|||
|
|
|
|||
|
|
通过本实验,应掌握以下能力:
|
|||
|
|
|
|||
|
|
1. 熟练使用底层 I/O(`open`/`read`/`write`)和标准 I/O(`fopen`/`fgets`/`fprintf`)
|
|||
|
|
2. 理解文件描述符与 `FILE *` 的区别
|
|||
|
|
3. 能用结构体进行二进制文件读写
|
|||
|
|
4. 了解系统调用与库函数的性能差异
|
|||
|
|
5. 综合运用字符串处理和文件 I/O 解决实际问题
|