Tech Support > Computer Hardware > Microprocessors > Heap fragmentation (2nd attempt)
Heap fragmentation (2nd attempt)
Posted by Thomas Ruschival on April 25th, 2006


Hi Groups,
unfortunately I am still trying to write a sample code that badly
fragments the heap and measure the time it take to allocate memory.
But this heap does not fragment, ant the time to allocate memory does not
increase. I don't know why. I tried different orders of malloc() free()
but I can't measure any runtime differences. In contrary, the first
mallocs on an unused heap take longer and later after some 1000
malloc/free actions the time used stabilizes on a lower value.
if I then after some time free all my allocated objects and then start
reallocating, time goes up for the first 1000 allocs and drops to the old
value. I don't know why.
either I am measureing the wrong stuff or the heap in linux 2.6 with glibc
does not fragment for any obscure reason.....

Can anyone tell me where the flaw in my model is?

below is the whole application code (tabsize 4), some parts are commented
out for testing diffent patterns. please play around a bit and tell me
what is wrong - if you can show fragmentation in any combination of
free()/malloc() runs please tell me how!

if the code is badly formatted you can download it at
http://www.ruschival.de/stuff/uni/heapfrag.c

Code follows: compile gcc -Wall -o heapfrag heapfrag.c

#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <sys/time.h>

#define VARSIZE

/* number of objects to be created */
#ifndef NO_HEAPOBJS
#define NO_HEAPOBJS 500000
#endif
/* maximum size of each object in Byte */
#ifndef MAX_OBJSIZE
#define MAX_OBJSIZE 1021 // nice primenumber
#endif
/* how many objects should be allocated per cycle */
#ifndef OBJS_PER_CYCLE
#define OBJS_PER_CYCLE NO_HEAPOBJS/10
#endif

/** Types used */

/* redeclare size_t do be compliant with IAS coding guidelines */
typedef size_t Tsize;

/* Struct to keep track of allocations and pointers to memory returned by
malloc() */ typedef struct memchunk{
unsigned int allocated;
unsigned int size;
void* memory;
} Tmemchunk;



/* Testprogram to allocate and deallocate memory randomly to fragment the
heap */ /* measures the time used to allocate OBJS_PER_CYCLE on the heap
*/ int main(int argc, char** argv){
int i=0,j=0; // generic counter
variable Tsize size=MAX_OBJSIZE/2; // size of new memory chunk to
allocate Tmemchunk objs[NO_HEAPOBJS]; // array to keep pointers to memory
chunks


/* Variables for statistics */
struct timeval programstart;// timestamp for total runtime of
program struct timeval starttime; // timestamp before
allocation struct timeval stoptime; // timestamp after allocation
of objects double delta; // time elapsed
for malloc() double runtime; // total
program runtime long cycle = 0; // counter
for cycles long objs_allocated=0; // track how many objects
crrently allocated long alloc_actions=0; // count malloc()
operations long free_actions=0; // count free() operations
Tsize total_size=0; // track bytes
allocated; char *filename = "timings.dat";

/* write pointer for test results */
FILE *fp;

// initialize all heap objects as free
for(i=0; i< NO_HEAPOBJS ; i++){
objs[i].allocated = 0;
objs[i].size = 0;
objs[i].memory = NULL;
}

//open target file
fp = fopen(filename,"w");
if(fp == NULL){
printf("failed opening file\n");
exit(1);
}

/* start timing */
gettimeofday(&programstart,NULL);

/* printf("Allocating %d objects on the heap for initial usage
\n",NO_HEAPOBJS/2); */ /* // start timer */
/* gettimeofday(&starttime,NULL); */
/* /\* Initially fill the heap to 50% *\/ */
/* for(i=0; i< NO_HEAPOBJS/2 ; i++){ */
/* #ifdef VARSIZE */
/* size = (Tsize)(random() % MAX_OBJSIZE); // random size
*/ /* #endif */
/* if( ( objs[i].memory= malloc(size)) !=
NULL ){ */ /* objs[i].allocated = 1; */
/* objs[i].size = size; */
/* total_size+=size; */
/* alloc_actions++; */
/* } */
/* else{ */
/* printf("malloc() failed! \n"); */
/* objs_allocated= alloc_actions-free_actions; */
/* printf("alloc actions %ld objs_allocated:
%ld total_size: %d \n",alloc_actions, objs_allocated,total_size); */ /*
exit(1); */ /* } */
/* } */
/* //stop timer */
/* gettimeofday(&stoptime,NULL); */
/* // Timing calculations */
/* delta = (stoptime.tv_sec - starttime.tv_sec); */
/* delta += (stoptime.tv_usec -
starttime.tv_usec)/(double)1000000; */ /* runtime = (stoptime.tv_sec
- programstart.tv_sec); */ /* runtime += (stoptime.tv_usec -
programstart.tv_usec)/(double)1000000; */ /* objs_allocated=
alloc_actions-free_actions; */ /* // printf("allocs \tfrees
\tobjs \tsize \t\tdelta[s] \truntime[s] \tcycle\n"); */ /*
fprintf(fp,"%ld \t%ld \t%ld \t%d \t%lf \t%lf
\t%ld\n",alloc_actions,free_actions, objs_allocated, */ /*
total_size,delta,runtime,cycle); */


printf("Starting infinite run\n");
/* heap fragmenst so quickly 100 runs are more than enough*/
while(cycle < 1000){
cycle++;
alloc_actions=0;
free_actions=0;
// if too many objects allocated -> free some space
/* if(objs_allocated >= (NO_HEAPOBJS-OBJS_PER_CYCLE)){ */
/* // free randomly twice the memory we allocate
in a cycle */ /* for(i=0; i<(OBJS_PER_CYCLE);
i++){ */ /* // randomly pick allocated
space */ /* do{ */
/* j = random()%NO_HEAPOBJS; */
/* }while(objs[j].allocated == 0); */
/* // piece of memory found --> deallocate
*/ /* objs[j].allocated = 0; */
/* total_size -= objs[j].size; */
/* free(objs[j].memory); */
/* objs_allocated--; */
/* // printf("freeing memory \n"); */
/* } // end deallocation for */
/* }// end if not enough space left */

// start timer
gettimeofday(&starttime,NULL);
for(i=0; i<OBJS_PER_CYCLE; i++){
#ifdef VARSIZE
size = (Tsize)(random() % MAX_OBJSIZE); // random
size #endif
j = random()%NO_HEAPOBJS; // pick random
index if( objs[j].allocated == 1 ){ // if allocated -> free
// only every 2nd attempt we actually free
memory. // if( random()%2 == 0){
objs[j].allocated = 0;
total_size -= objs[j].size;
free(objs[j].memory);
free_actions++;
objs_allocated--;
// }
/* else{ // reallocate -> time goes up
because mem is copied */ /* objs[j].memory =
realloc(objs[j].memory,size); */ /*
total_size -= objs[j].size; */ /* total_size+=size; */
/* objs[j].size = size; */
/* } */
}
// else{ // try to allocate
if( (objs[j].memory = malloc(size)) !=
NULL ){ objs[j].allocated = 1;
objs[j].size = size;
total_size+=size;
objs_allocated++;
alloc_actions++;
}
else{
printf("malloc() failed! \n");
objs_allocated= alloc_actions-free_actions;
printf("alloc actions %ld objs_allocated: %ld
total_size: %d \n", alloc_actions, objs_allocated,total_size);
exit(1);
}// end if try to allocate
// }// end if allocated
}// end for
//stop timer
gettimeofday(&stoptime,NULL);
// Timing calculations
delta = (stoptime.tv_sec - starttime.tv_sec);
delta += (stoptime.tv_usec -
starttime.tv_usec)/(double)1000000; runtime = (stoptime.tv_sec -
programstart.tv_sec); runtime += (stoptime.tv_usec -
programstart.tv_usec)/(double)1000000;

fprintf(fp,"%ld \t%ld \t%ld \t%d \t%lf \t%lf
\t%ld\n",alloc_actions,free_actions, objs_allocated,
total_size,delta,runtime,cycle); // free all at once
if(objs_allocated == NO_HEAPOBJS){
for(i=0; i< NO_HEAPOBJS ; i++){
objs[i].allocated = 0;
objs[i].size = 0;
free(objs[i].memory);

}
total_size=0;
objs_allocated=0;
}
} // end infinite loop
return 0;
}

Posted by CBFalconer on April 25th, 2006


Thomas Ruschival wrote:
To start with, your code does not survive transmission as a usenet
article because lines are overlong (should be limited to about 65
chars) and it uses // comments. These do not survive line wraps,
so nobody is going to try to untangle the mess. It also uses
non-standard headers, such as <sys/time.h>, which are unknown
quantities to most readers here.

Secondly, heap fragmentation depends on the malloc package much
more than the sequence of calls. A good package will be relatively
impervious to the call sequence. The place you should be looking
is the source code to your own malloc package.

One of the biggest delays in many packages is the O(N*N) timing of
the free operation, where N is the number of blocks assigned. This
can be reduced to O(N) by making the actual free O(1). It has
nothing to do with fragmentation. For an example of a malloc
package that handles this (for DJGPP, and probably a large class of
*IX based systems also) see my nmalloc package, available at:

<http://cbfalconer.home.att.net/nmalloc.zip>

Remember, the actual malloc package cannot be portable by its very
nature, which is why it is supplied by the system. The nmalloc
package includes test drivers which can assign and free many random
sizes, and allows you take random snapshots of the result. The
guarded debuggery is written to be specific to gcc, but if you
remove that the result should still be useful.

--
"If you want to post a followup via groups.google.com, don't use
the broken "Reply" link at the bottom of the article. Click on
"show options" at the top of the article, then click on the
"Reply" at the bottom of the article headers." - Keith Thompson
More details at: <http://cfaj.freeshell.org/google/>
Also see <http://www.safalra.com/special/googlegroupsreply/>



Posted by Matthias on April 26th, 2006



Thomas Ruschival wrote:

This looks like your application is "getting used" to its job, i.e.
maybe some kind of caching or clever pre-allocation is performed by
your environment. Maybe it is simply the CPU getting most of your code
into its cache after so many repetitions. Just an idea...

Regards,
Matthias


Posted by Thomas Ruschival on April 26th, 2006


Hi CBFalconer,
thanks alot for more insight!

very important
http://cbfalconer.home.att.net/nmalloc.zip
doesn't work --> 404 No such file!

C> To start with, your code does not survive transmission as a usenet
C> article because lines are overlong (should be limited to about 65
C> chars) and it uses // comments.
Sorry, I thought 80 chars is standard but as I posted I saw the mess.

C> It also usesnon-standard headers, such as <sys/time.h>, which are
C> unknown quantities to most readers here.
I didn't know this wither I have currently no access to other
operating systems. I only have Linux and Cygwin on windows (which can't
use the microseconds from timeval because of the bad clock granularity.

C> Secondly, heap fragmentation depends on the malloc package much
C> more than the sequence of calls. A good package will be relatively
C> impervious to the call sequence. The place you should be looking
C> is the source code to your own malloc package.
Thanks for the hint on the malloc source.
In University I learnt and understood that regardless the allocation
scheme the will be fragmentation of the heap using random sizes in
allocation. I can only avoid it completetly with several heaps that have
constant blocksizes. or a GC that does compactification once in a while.
At my current knowledge I think the heap is a simple list whose elements
look like
struct memchunk{
struct memchunk next;
unsigned int size;
}
followed by size words of data.
And every time malloc is called it follows the list until
(size-sizeof(memchunk)) >= required size form malloc()
therefore it inevitably collects many very small chunks at the beginning
of the list. If it doesn't find a machting chunk of memory it requests
a new memory location from the OS.

C> One of the biggest delays in many packages is the O(N*N) timing of
C> the free operation, where N is the number of blocks assigned. This
Why is free O(N*N) isn't it just simply adding the freed chunk at the end
of the list?
C> can be reduced to O(N) by making the actual free O(1).
is this what you are talking about?

C> <http://cbfalconer.home.att.net/nmalloc.zip>
sorry link is broken!

Posted by Thomas Ruschival on April 26th, 2006


Hi Matthias
M> This looks like your application is "getting used" to its job, i.e.
M> maybe some kind of caching or clever pre-allocation is performed by
M> your environment.
could be.
I read something about low fragmenting heaps.
or its is using some kind of paged heap therefore getting constant
blocksize which does not fragment.

M> Maybe it is simply the CPU getting most of your code
M> into its cache after so many repetitions. Just an idea...
I doubt it my CPU doesn't have 150 MB cache and still it would have to
dereference a lot following the freelist.

Time to look into the malloc code

Thomas

Posted by CBFalconer on April 27th, 2006


Thomas Ruschival wrote:
First, the link was wrong. Try:

<http://cbfalconer.home.att.net/download/nmalloc.zip>

Second, the N*N performance is due to free attempting to stitch
together adjacent parcels of memory, and thus converting adjacent
small parcels into single large parcels. In many systems this
requires an exhaustive search of all the allocated and free
parcels. My code keeps track of things such that all such
combinations are known immediately, and thus free becomes O(1). It
was born out of frustration with the existing system. The cost is
extra storage overhead of about 8 bytes per allocation, but the
benefits include easy heap condition checking and debugging, not to
mention speed.



--
"If you want to post a followup via groups.google.com, don't use
the broken "Reply" link at the bottom of the article. Click on
"show options" at the top of the article, then click on the
"Reply" at the bottom of the article headers." - Keith Thompson
More details at: <http://cfaj.freeshell.org/google/>
Also see <http://www.safalra.com/special/googlegroupsreply/>



Posted by Thomas Ruschival on April 27th, 2006


Thanks,
I think I found the answer. in the gnu glibc manual sections
3.2.2.6 "efficiency considerations for malloc"
quote:
"As opposed to other versions, the malloc in the GNU C Library does not
round up block sizes to powers of two, neither for large nor for small
sizes. Neighboring chunks can be coalesced on a free no matter what their
size is. This makes the implementation suitable for all kinds of
allocation patterns without generally incurring high memory waste through
fragmentation."
This sound exacly like the method you just explained.

I will now measure the time free() takes and use your package for tracing
the actions.

Thanks alot.
Thomas

Posted by Matthias on April 27th, 2006



Thomas Ruschival wrote:

I hope your *code* for memory management is less than 150MB ;-)
But you are right, the data required for the book keeping may exceed
the CPU's cache.

Regards,
Matthias



Similar Posts