Entity Framework是.NET平台下的一种简单易用的ORM框架,它既便于Domain Model和持久层的OO设计,也提高了代码的可维护性。但在使用中发现,有几类业务场景是EF不太擅长的,比如批量写入大量同类数据,为此本人做了一些对比测试,以供大家参考。
现假设我们需要做一个用户批量导入的功能,需要从某处导入1k~1w个User到SQLServer数据库,本人听说过的常见做法有如下几种:
- 使用ADO.NET单条SqlCommand执行1w次(根据常识作为EF的替代其性能还不够格,所以就不做测试了)
- 使用StringBuilder拼接SQL语句,将1w条Insert语句拼接成1到若干条SqlCommand执行
- 使用EntityFramework的基本功能进行插入
- 使用SqlBulkCopy进行批量插入
- 使用存储过程,其中的2种分支分别对应上述1、2用例,另外还有1种表参数存储过程。
数据库准备工作:
1 CREATE DATABASE BulkInsertTest 2 GO 3 4 USE BulkInsertTest 5 GO 6 7 CREATE TABLE [dbo].[User]( 8 [Id] [int] IDENTITY(1,1) NOT NULL, 9 [Name] [nvarchar](50) NOT NULL, 10 [Birthday] [date] NOT NULL, 11 [Gender] [char](1) NOT NULL, 12 [Email] [nvarchar](50) NOT NULL, 13 [Deleted] [bit] NOT NULL, 14 CONSTRAINT [PK_User] PRIMARY KEY CLUSTERED 15 ( 16 [Id] ASC 17 )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 18 ) ON [PRIMARY] 19 20 GO 21 22 CREATE PROCEDURE [dbo].[InsertUser] 23 @Name nvarchar(50) 24 ,@Birthday date 25 ,@Gender char(1) 26 ,@Email nvarchar(50) 27 ,@Deleted bit 28 AS 29 BEGIN 30 INSERT INTO [BulkInsertTest].[dbo].[User] 31 ([Name] 32 ,[Birthday] 33 ,[Gender] 34 ,[Email] 35 ,[Deleted]) 36 VALUES 37 (@Name,@Birthday,@Gender,@Email,@Deleted) 38 39 END 40 41 /* Create a table type. */ 42 CREATE TYPE LocationTableType AS TABLE 43 ( Name nvarchar(50) 44 ,Birthday date 45 ,Gender char(1) 46 ,Email nvarchar(50) 47 ,Deleted bit ); 48 GO 49 50 /* Create a procedure to receive data for the table-valued parameter. */ 51 CREATE PROCEDURE [dbo].[InsertUsers] 52 @Users LocationTableType 53 AS 54 SET NOCOUNT ON 55 INSERT INTO [dbo].[User] 56 ([Name] 57 ,[Birthday] 58 ,[Gender] 59 ,[Email] 60 ,[Deleted]) 61 SELECT * 62 FROM @Users; 63 64 GO
创建DbContext和User Entity的C#代码:
1 using System; 2 using System.ComponentModel.DataAnnotations; 3 using System.ComponentModel.DataAnnotations.Schema; 4 using System.Data.Entity; 5 6 namespace ConsoleApplication5 7 { 8 public class MyDbContext : DbContext 9 { 10 public MyDbContext() : base("MyDbContext") { } 11 12 public MyDbContext(string connectionString) : 13 base(connectionString) 14 { 15 16 } 17 18 public DbSet<User> Users { get; set; } 19 } 20 21 [Table("User")] 22 public class User 23 { 24 [Key] 25 public int Id { get; set; } 26 27 public string Name { get; set; } 28 29 public DateTime Birthday { get; set; } 30 31 public string Gender { get; set; } 32 33 public string Email { get; set; } 34 35 public bool Deleted { get; set; } 36 } 37 }
测试程序C#代码:
1 using System; 2 using System.Data; 3 using System.Data.SqlClient; 4 using System.Diagnostics; 5 using System.Linq; 6 using System.Text; 7 8 namespace ConsoleApplication5 9 { 10 class Program 11 { 12 private const string ConnectionString = "Data Source=.;Initial Catalog=BulkInsertTest;User=sa;Password=IGTtest1"; 13 private const int Times = 10; 14 private const int Entries = 10000; 15 16 static void Main(string[] args) 17 { 18 long sumBulkCopyTime = 0, sumSqlCmdsTime = 0, sumMultiSpTime = 0, sumTableSpTime = 0, sumEfTime = 0; 19 long maxBulkCopyTime = 0, maxSqlCmdsTime = 0, maxMultiSpTime = 0, maxTableSpTime = 0, maxEfTime = 0; 20 for (int i = 0; i < Times; i++) 21 { 22 long bulkCopyTime = InsertBySqlBulkCopy(); 23 sumBulkCopyTime += bulkCopyTime; 24 maxBulkCopyTime = Math.Max(maxBulkCopyTime, bulkCopyTime); 25 26 long sqlCmdsTime = InsertBySqlCmds(); 27 sumSqlCmdsTime += sqlCmdsTime; 28 maxSqlCmdsTime = Math.Max(maxSqlCmdsTime, sqlCmdsTime); 29 30 long multiSpTime = InsertByMultiStoreProcedure(); 31 sumMultiSpTime += multiSpTime; 32 maxMultiSpTime = Math.Max(maxMultiSpTime, multiSpTime); 33 34 long tableSpTime = InsertByTableStoreProcedure(); 35 sumTableSpTime += tableSpTime; 36 maxTableSpTime = Math.Max(maxTableSpTime, tableSpTime); 37 38 long efTime = InsertByEntityFramework(); 39 sumEfTime += efTime; 40 maxEfTime = Math.Max(maxEfTime, efTime); 41 } 42 Console.WriteLine(new string('-', 40)); 43 Console.WriteLine("Time Cost of SqlBulkCopy: avg:{0}ms, max:{1}ms", sumBulkCopyTime / Times, maxBulkCopyTime); 44 Console.WriteLine("Time Cost of SqlCommands: avg:{0}ms, max:{1}ms", sumSqlCmdsTime / Times, maxSqlCmdsTime); 45 Console.WriteLine("Time Cost of MultiStoreProcedure: avg:{0}ms, max:{1}ms", sumMultiSpTime / Times, maxMultiSpTime); 46 Console.WriteLine("Time Cost of TableStoreProcedure: avg:{0}ms, max:{1}ms", sumTableSpTime / Times, maxTableSpTime); 47 Console.WriteLine("Time Cost of EntityFramework: avg:{0}ms, max:{1}ms", sumEfTime / Times, maxEfTime); 48 Console.ReadLine(); 49 } 50 51 private static long InsertBySqlCmds() 52 { 53 Stopwatch stopwatch = Stopwatch.StartNew(); 54 using (var connection = new SqlConnection(ConnectionString)) 55 { 56 SqlTransaction transaction = null; 57 connection.Open(); 58 try 59 { 60 transaction = connection.BeginTransaction(); 61 StringBuilder sb = new StringBuilder(); 62 for (int j = 0; j < Entries; j++) 63 { 64 sb.AppendFormat(@"INSERT INTO dbo.[User] ([Name],[Birthday],[Gender],[Email],[Deleted]) 65 VALUES('{0}','{1:yyyy-MM-dd}','{2}','{3}',{4});", "name" + j, DateTime.Now.AddDays(j), 'M', "user" + j + "@abc.com", 0); 66 } 67 var sqlCmd = connection.CreateCommand(); 68 sqlCmd.CommandText = sb.ToString(); 69 sqlCmd.Transaction = transaction; 70 sqlCmd.ExecuteNonQuery(); 71 transaction.Commit(); 72 } 73 catch 74 { 75 if (transaction != null) 76 { 77 transaction.Rollback(); 78 } 79 throw; 80 } 81 } 82 stopwatch.Stop(); 83 Console.WriteLine("SqlCommand time cost: {0}ms", stopwatch.ElapsedMilliseconds); 84 return stopwatch.ElapsedMilliseconds; 85 } 86 87 private static long InsertByMultiStoreProcedure() 88 { 89 Stopwatch stopwatch = Stopwatch.StartNew(); 90 using (var connection = new SqlConnection(ConnectionString)) 91 { 92 SqlTransaction transaction = null; 93 connection.Open(); 94 for (int i = 0; i < 10; i++) 95 { 96 try 97 { 98 transaction = connection.BeginTransaction(); 99 StringBuilder sb = new StringBuilder(); 100 for (int j = 0; j < Entries/10; j++) 101 { 102 sb.AppendFormat(@"EXECUTE [dbo].[InsertUser] '{0}','{1:yyyy-MM-dd}','{2}','{3}',{4};", 103 "name" + j, DateTime.Now.AddDays(j), 'M', "user" + j + "@abc.com", 0); 104 } 105 var sqlCmd = connection.CreateCommand(); 106 sqlCmd.CommandText = sb.ToString(); 107 sqlCmd.Transaction = transaction; 108 sqlCmd.ExecuteNonQuery(); 109 transaction.Commit(); 110 } 111 catch 112 { 113 if (transaction != null) 114 { 115 transaction.Rollback(); 116 } 117 throw; 118 } 119 } 120 } 121 stopwatch.Stop(); 122 Console.WriteLine("MultiStoreProcedure time cost: {0}ms", stopwatch.ElapsedMilliseconds); 123 return stopwatch.ElapsedMilliseconds; 124 } 125 126 private static long InsertByTableStoreProcedure() 127 { 128 Stopwatch stopwatch = Stopwatch.StartNew(); 129 var table = PrepareDataTable(); 130 using (var connection = new SqlConnection(ConnectionString)) 131 { 132 SqlTransaction transaction = null; 133 connection.Open(); 134 try 135 { 136 transaction = connection.BeginTransaction(); 137 var sqlCmd = connection.CreateCommand(); 138 sqlCmd.CommandText = "InsertUsers"; 139 sqlCmd.CommandType = CommandType.StoredProcedure; 140 sqlCmd.Parameters.Add(new SqlParameter("@Users", SqlDbType.Structured)); 141 sqlCmd.Parameters["@Users"].Value = table; 142 sqlCmd.Transaction = transaction; 143 sqlCmd.ExecuteNonQuery(); 144 transaction.Commit(); 145 } 146 catch 147 { 148 if (transaction != null) 149 { 150 transaction.Rollback(); 151 } 152 throw; 153 } 154 } 155 stopwatch.Stop(); 156 Console.WriteLine("TableStoreProcedure time cost: {0}ms", stopwatch.ElapsedMilliseconds); 157 return stopwatch.ElapsedMilliseconds; 158 } 159 160 private static long InsertBySqlBulkCopy() 161 { 162 Stopwatch stopwatch = Stopwatch.StartNew(); 163 164 var table = PrepareDataTable(); 165 SqlBulkCopy(table); 166 167 stopwatch.Stop(); 168 Console.WriteLine("SqlBulkCopy time cost: {0}ms", stopwatch.ElapsedMilliseconds); 169 return stopwatch.ElapsedMilliseconds; 170 } 171 172 private static DataTable PrepareDataTable() 173 { 174 DataTable table = new DataTable(); 175 table.Columns.Add("Name", typeof (string)); 176 table.Columns.Add("Birthday", typeof (DateTime)); 177 table.Columns.Add("Gender", typeof (char)); 178 table.Columns.Add("Email", typeof (string)); 179 table.Columns.Add("Deleted", typeof (bool)); 180 for (int i = 0; i < Entries; i++) 181 { 182 var row = table.NewRow(); 183 row["Name"] = "name" + i; 184 row["Birthday"] = DateTime.Now.AddDays(i); 185 row["Gender"] = 'M'; 186 row["Email"] = "user" + i + "@abc.com"; 187 row["Deleted"] = false; 188 table.Rows.Add(row); 189 } 190 return table; 191 } 192 193 private static void SqlBulkCopy(DataTable dataTable) 194 { 195 using (var connection = new SqlConnection(ConnectionString)) 196 { 197 SqlTransaction transaction = null; 198 connection.Open(); 199 try 200 { 201 transaction = connection.BeginTransaction(); 202 using (var sqlBulkCopy = new SqlBulkCopy(connection, SqlBulkCopyOptions.Default, transaction)) 203 { 204 sqlBulkCopy.BatchSize = dataTable.Rows.Count; 205 206 sqlBulkCopy.DestinationTableName = "[User]"; 207 //sqlBulkCopy.ColumnMappings.Add("Id", "Id"); 208 sqlBulkCopy.ColumnMappings.Add("Name", "Name"); 209 sqlBulkCopy.ColumnMappings.Add("Birthday", "Birthday"); 210 sqlBulkCopy.ColumnMappings.Add("Gender", "Gender"); 211 sqlBulkCopy.ColumnMappings.Add("Email", "Email"); 212 sqlBulkCopy.ColumnMappings.Add("Deleted", "Deleted"); 213 214 sqlBulkCopy.WriteToServer(dataTable); 215 } 216 transaction.Commit(); 217 } 218 catch 219 { 220 if (transaction!=null) 221 { 222 transaction.Rollback(); 223 } 224 throw; 225 } 226 } 227 } 228 229 private static long InsertByEntityFramework() 230 { 231 Stopwatch stopwatch = Stopwatch.StartNew(); 232 using (MyDbContext context = new MyDbContext(ConnectionString)) 233 { 234 context.Configuration.AutoDetectChangesEnabled = false; 235 context.Configuration.ValidateOnSaveEnabled = false; 236 for (int i = 0; i < Entries; i++) 237 { 238 context.Users.Add(new User() 239 { 240 Name = "name" + i, 241 Birthday = DateTime.Now.AddDays(i), 242 Gender = "F", 243 Email = "user" + i + "@abc.com", 244 Deleted = false 245 }); 246 } 247 context.SaveChanges(); 248 } 249 250 stopwatch.Stop(); 251 Console.WriteLine("EntityFramework time cost: {0}ms", stopwatch.ElapsedMilliseconds); 252 return stopwatch.ElapsedMilliseconds; 253 } 254 } 255 }
插入1000行测试结果:
插入10000行测试结果:
分析与结论:单从性能上来说,SqlBulkCopy和表参数StoreProcedure胜出,且完胜Entity Framework,所以当EF实在无法满足性能要求时,SqlBulkCopy或表参数SP可以很好的解决EF批量插入的性能问题。但衡量软件产品的标准不仅仅只有性能这一方面,比如我们还要在设计美学和性能之间进行权衡。当插入数据量较小或是低压力时间段自动执行插入的话,EF仍然是不错的选择。从代码可维护性方面来看ADO.NET实现的可读性、重构友好型都弱于EF实现,所以对于需求变动较多的领域模型而言这几种解决方法都需要更多的设计抽象和单元测试,以此来确保产品的持续发展。从影响范围来看,在ADO.NET实现方式中SqlBulkCopy和拼接Sql字符串的方案不需要额外加入存储过程,所以可以在不影响数据库部署的前提下与EF的实现相互替换。
关于SqlBulkCopy请参考:Bulk Copy Operations in SQL Server
为了比较优雅使用SqlBulkCopy,有人写了一种AsDataReader扩展方法请参考:LinqEntityDataReader
根据MSDN的说法,由于表参数存储过程的启动准备消耗时间较小,所以1k行(经验)以下插入性能将胜于SqlBulkCopy,而随着插入行数的增多,SqlBulkCopy的性能优势将体现出来,另外两种方案相比还有一些其他方面的差异,从本测试的实际结果来看,SqlBulkCopy在首次插入1k条数据时确实耗时稍长一点。具体请参考:Table-Valued Parameters vs. BULK INSERT Operations
另外还有人做过SqlBulkCopy和SqlDataAdapter插入的性能对比:High performance bulk loading to SQL Server using SqlBulkCopy
没有评论:
发表评论